From 307fa78b3355f368e0f0b23d70ba40f395f934d9 Mon Sep 17 00:00:00 2001 From: Bob Rudis Date: Tue, 30 May 2017 17:48:18 -0400 Subject: [PATCH] getting ready for new dplyr --- .gitignore | 1 + DESCRIPTION | 6 +- NEWS.md | 5 + R/dplyr.r | 31 ++--- README.Rmd | 12 +- README.md | 256 ++++++++++++++++++++--------------------- man/src_drill.Rd | 31 +---- man/src_tbls.Rd | 36 ++++++ sergeant.Rproj | 1 + tests/testthat/test-sergeant.R | 7 +- 10 files changed, 204 insertions(+), 182 deletions(-) create mode 100644 man/src_tbls.Rd diff --git a/.gitignore b/.gitignore index 807ea25..728e389 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .Rproj.user .Rhistory .RData +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index 580a59c..abb2233 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sergeant Title: Tools to Transform and Query Data with the 'Apache Drill' 'REST API' and 'JDBC' Interfaces, Plus 'dplyr' and 'DBI' Interfaces -Version: 0.3.2 +Version: 0.4.0 Authors@R: c(person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")), person("Edward", "Visel", email = "edward.visel@gmail.com", role = "ctb")) Description: 'Apache Drill' is a low-latency distributed query engine designed to enable @@ -15,7 +15,7 @@ Depends: dbplyr URL: http://github.com/hrbrmstr/sergeant BugReports: https://github.com/hrbrmstr/sergeant/issues -License: AGPL + file LICENSE +License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: @@ -32,3 +32,5 @@ Imports: Suggests: testthat RoxygenNote: 6.0.1 +Remotes: + tidyverse/dbplyr diff --git a/NEWS.md b/NEWS.md index 0181c02..193230a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# sergeant 0.4.0 + +- Getting ready for new `dplyr` (thx to Edward Visel) +- Cleaned up roxygen docs so that `src_drill` is exported now. + # sergeant 0.3.2 - Finally got quoting done. I thought I had before but I guess I hadn't. diff --git a/R/dplyr.r b/R/dplyr.r index 2fd9eea..1e91220 100644 --- a/R/dplyr.r +++ b/R/dplyr.r @@ -1,16 +1,14 @@ -#' Connect to Drill (using \code{dplyr}). +#' Connect to Drill (dplyr) #' #' Use \code{src_drill()} to connect to a Drill cluster and `tbl()` to connect to a #' fully-qualified "table reference". The vast majority of Drill SQL functions have #' also been made available to the \code{dplyr} interface. If you have custom Drill #' SQL functions that need to be implemented please file an issue on GitHub. #' -#' @note This is a DBI wrapper around the Drill REST API. -#' @note TODO username/password support -#' #' @param host Drill host (will pick up the value from \code{DRILL_HOST} env var) #' @param port Drill port (will pick up the value from \code{DRILL_PORT} env var) #' @param ssl use ssl? +#' @note This is a DBI wrapper around the Drill REST API. TODO username/password support #' @export #' @examples \dontrun{ #' db <- src_drill("localhost", "8047") @@ -32,7 +30,6 @@ #' rpd = rpad(full_name, 20L), #' rpdw = rpad_with(full_name, 20L, "*")) #' } -#' @export src_drill <- function(host=Sys.getenv("DRILL_HOST", "localhost"), port=as.integer(Sys.getenv("DRILL_PORT", 8047L)), ssl=FALSE) { @@ -43,36 +40,40 @@ src_drill <- function(host=Sys.getenv("DRILL_HOST", "localhost"), } -#' @rdname src_drill -#' @keywords internal +#' src tbls +#' +#' "SHOW DATABASES" +#' +#' @rdname src_tbls +#' @param x x #' @export src_tbls.src_drill <- function(x) { tmp <- dbGetQuery(x$con, "SHOW DATABASES") paste0(unlist(tmp$SCHEMA_NAME, use.names=FALSE), collapse=", ") } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export db_desc.src_drill <- function(x) { tmp <- dbGetQuery(x$con, "SELECT * FROM sys.version") version <- tmp$version - tmp <- dbGetQuery(x$con, "SELECT (direct_max / 1024 / 1024 /1024) AS direct_max FROM sys.memory") + tmp <- dbGetQuery(x$con, "SELECT (direct_max / 1024 / 1024 / 1024) AS direct_max FROM sys.memory") memory <- tmp$direct_max sprintf("Drill %s [%s:%d] [%dGB direct memory]", version, x$con@host, x$con@port, memory) } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export sql_escape_ident.DrillConnection <- function(con, x) { ifelse(grepl("`", x), sql_quote(x, ' '), sql_quote(x, '`')) } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export copy_to.src_drill <- function(dest, df) { @@ -88,7 +89,7 @@ tbl.src_drill <- function(src, from, ...) { tbl_sql("drill", src=src, from=from, ...) } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export db_explain.DrillConnection <- function(con, sql, ...) { @@ -97,7 +98,7 @@ db_explain.DrillConnection <- function(con, sql, ...) { return(paste(explanation[[1]], collapse = "\n")) } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export db_query_fields.DrillConnection <- function(con, sql, ...) { @@ -111,7 +112,7 @@ db_query_fields.DrillConnection <- function(con, sql, ...) { } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export db_data_type.DrillConnection <- function(con, fields, ...) { @@ -132,7 +133,7 @@ db_data_type.DrillConnection <- function(con, fields, ...) { vapply(fields, data_type, character(1)) } -#' @rdname src_drill +#' @rdname src_tbls #' @keywords internal #' @export sql_translate_env.DrillConnection <- function(con) { diff --git a/README.Rmd b/README.Rmd index 4598edb..b06ccca 100644 --- a/README.Rmd +++ b/README.Rmd @@ -88,7 +88,7 @@ options(width=120) ```{r message=FALSE} library(sergeant) -ds <- src_drill("drill.local") +ds <- src_drill("drillex") # use localhost if running standalone on same system otherwise the host or IP of your Drill server ds db <- tbl(ds, "cp.`employee.json`") @@ -167,7 +167,7 @@ library(sergeant) # current verison packageVersion("sergeant") -dc <- drill_connection("localhost") +dc <- drill_connection("drillex") drill_active(dc) @@ -225,9 +225,11 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat ```{r} library(RJDBC) -con <- drill_jdbc("drill.local:2181", "jla") -# or the following if running drill-embedded -# con <- drill_jdbc("localhost:31010", use_zk=FALSE) +# Use this if connecting to a cluster with zookeeper +# con <- drill_jdbc("drill-node:2181", "drillbits1") + +# Use the following if running drill-embedded +con <- drill_jdbc("localhost:31010", use_zk=FALSE) drill_query(con, "SELECT * FROM cp.`employee.json`") diff --git a/README.md b/README.md index 20055af..726d27a 100644 --- a/README.md +++ b/README.md @@ -73,19 +73,18 @@ devtools::install_github("hrbrmstr/sergeant") ``` r library(sergeant) -ds <- src_drill("drill.local") +ds <- src_drill("drillex") # use localhost if running standalone on same system otherwise the host or IP of your Drill server ds -#> src: Drill 1.9.0 [drill.local:8047] [32GB direct memory] -#> tbls: INFORMATION_SCHEMA, cp.default, dfs.default, dfs.pq, dfs.root, dfs.tmp, sys +#> src: DrillConnection +#> tbls: INFORMATION_SCHEMA, cp.default, dfs.d, dfs.default, dfs.h, dfs.natexp, dfs.p, dfs.root, dfs.tmp, sys db <- tbl(ds, "cp.`employee.json`") # without `collect()`: count(db, gender, marital_status) -#> Source: query [?? x 3] -#> Database: Drill 1.9.0 [drill.local:8047] [32GB direct memory] -#> Groups: gender -#> +#> # Source: lazy query [?? x 3] +#> # Database: DrillConnection +#> # Groups: gender #> marital_status gender n #> #> 1 S F 297 @@ -102,9 +101,8 @@ count(db, gender, marital_status) # LIMIT 1000 count(db, gender, marital_status) %>% collect() -#> Source: local data frame [4 x 3] -#> Groups: gender [2] -#> +#> # A tibble: 4 x 3 +#> # Groups: gender [2] #> marital_status gender n #> * #> 1 S F 297 @@ -127,18 +125,18 @@ group_by(db, position_title) %>% mutate(full_desc=ifelse(gender=="F", "Female", "Male")) %>% collect() %>% select(Title=position_title, Gender=full_desc, Count=n) -#> # A tibble: 30 × 3 +#> # A tibble: 30 x 3 #> Title Gender Count -#> * -#> 1 President Female 1 -#> 2 VP Country Manager Male 3 -#> 3 VP Country Manager Female 3 -#> 4 VP Information Systems Female 1 -#> 5 VP Human Resources Female 1 -#> 6 Store Manager Female 13 -#> 7 VP Finance Male 1 -#> 8 Store Manager Male 11 -#> 9 HQ Marketing Female 2 +#> * +#> 1 President Female 1 +#> 2 VP Country Manager Male 3 +#> 3 VP Country Manager Female 3 +#> 4 VP Information Systems Female 1 +#> 5 VP Human Resources Female 1 +#> 6 Store Manager Female 13 +#> 7 VP Finance Male 1 +#> 8 Store Manager Male 11 +#> 9 HQ Marketing Female 2 #> 10 HQ Information Systems Female 4 #> # ... with 20 more rows @@ -151,31 +149,31 @@ group_by(db, position_title) %>% # GROUP BY position_title , gender ) dcyuypuypb arrange(db, desc(employee_id)) %>% print(n=20) -#> Source: query [?? x 16] -#> Database: Drill 1.9.0 [drill.local:8047] [32GB direct memory] -#> -#> store_id gender department_id birth_date supervisor_id last_name position_title hire_date -#> -#> 1 8 M 17 1914-02-02 949 Dittmar Store Permanent Stocker 1998-01-01 -#> 2 8 F 17 1914-02-02 949 Jantzer Store Permanent Stocker 1998-01-01 -#> 3 8 F 17 1914-02-02 949 Sweet Store Permanent Stocker 1998-01-01 -#> 4 8 M 17 1914-02-02 949 Murphy Store Permanent Stocker 1998-01-01 -#> 5 8 M 17 1914-02-02 948 Lindsay Store Permanent Stocker 1998-01-01 -#> 6 8 M 17 1914-02-02 948 Burke Store Permanent Stocker 1998-01-01 -#> 7 8 M 17 1914-02-02 948 Bunosky Store Permanent Stocker 1998-01-01 -#> 8 8 F 17 1914-02-02 948 Cabrera Store Permanent Stocker 1998-01-01 -#> 9 8 F 17 1914-02-02 948 Terry Store Permanent Stocker 1998-01-01 -#> 10 8 F 17 1914-02-02 947 Case Store Permanent Stocker 1998-01-01 -#> 11 6 F 18 1976-10-05 56 Horne Store Temporary Stocker 1997-01-01 -#> 12 8 F 17 1914-02-02 947 Nutter Store Permanent Stocker 1998-01-01 -#> 13 8 F 17 1914-02-02 947 Willeford Store Permanent Stocker 1998-01-01 -#> 14 8 M 17 1914-02-02 947 Clendenen Store Permanent Stocker 1998-01-01 -#> 15 8 F 17 1914-02-02 947 Wall Store Permanent Stocker 1998-01-01 -#> 16 8 F 16 1914-02-02 949 Morrow Store Temporary Checker 1998-01-01 -#> 17 8 M 16 1914-02-02 949 Wilson Store Temporary Checker 1998-01-01 -#> 18 8 F 16 1914-02-02 949 Duncan Store Temporary Checker 1998-01-01 -#> 19 8 F 16 1914-02-02 949 Anderson Store Temporary Checker 1998-01-01 -#> 20 8 M 16 1914-02-02 949 Watson Store Temporary Checker 1998-01-01 +#> # Source: table [?? x 16] +#> # Database: DrillConnection +#> # Ordered by: desc(employee_id) +#> store_id gender department_id birth_date supervisor_id last_name position_title hire_date +#> +#> 1 18 F 18 1914-02-02 1140 Stand Store Temporary Stocker 1998-01-01 +#> 2 18 M 18 1914-02-02 1140 Burnham Store Temporary Stocker 1998-01-01 +#> 3 18 F 18 1914-02-02 1139 Doolittle Store Temporary Stocker 1998-01-01 +#> 4 18 M 18 1914-02-02 1139 Pirnie Store Temporary Stocker 1998-01-01 +#> 5 18 M 17 1914-02-02 1140 Younce Store Permanent Stocker 1998-01-01 +#> 6 18 F 17 1914-02-02 1140 Biltoft Store Permanent Stocker 1998-01-01 +#> 7 18 M 17 1914-02-02 1139 Detwiler Store Permanent Stocker 1998-01-01 +#> 8 18 F 17 1914-02-02 1139 Ciruli Store Permanent Stocker 1998-01-01 +#> 9 18 F 16 1914-02-02 1140 Bishop Store Temporary Checker 1998-01-01 +#> 10 18 F 16 1914-02-02 1140 Cutwright Store Temporary Checker 1998-01-01 +#> 11 18 F 16 1914-02-02 1139 Anderson Store Temporary Checker 1998-01-01 +#> 12 18 F 16 1914-02-02 1139 Swartwood Store Temporary Checker 1998-01-01 +#> 13 18 M 15 1914-02-02 1140 Curtsinger Store Permanent Checker 1998-01-01 +#> 14 18 F 15 1914-02-02 1140 Quick Store Permanent Checker 1998-01-01 +#> 15 18 M 15 1914-02-02 1139 Souza Store Permanent Checker 1998-01-01 +#> 16 18 M 15 1914-02-02 1139 Compagno Store Permanent Checker 1998-01-01 +#> 17 18 M 11 1961-09-24 1139 Jaramillo Store Shift Supervisor 1998-01-01 +#> 18 18 M 11 1972-05-12 17 Belsey Store Assistant Manager 1998-01-01 +#> 19 12 M 18 1914-02-02 1069 Eichorn Store Temporary Stocker 1998-01-01 +#> 20 12 F 18 1914-02-02 1069 Geiermann Store Temporary Stocker 1998-01-01 #> # ... with more rows, and 8 more variables: management_role , salary , marital_status , full_name , #> # employee_id , education_level , first_name , position_id @@ -194,18 +192,18 @@ mutate(db, position_title=tolower(position_title)) %>% group_by(supervisor_id) %>% summarise(underlings_count=n()) %>% collect() -#> # A tibble: 112 × 2 +#> # A tibble: 112 x 2 #> supervisor_id underlings_count -#> * -#> 1 0 1 -#> 2 1 7 -#> 3 5 9 -#> 4 4 2 -#> 5 2 3 -#> 6 20 2 -#> 7 21 4 -#> 8 22 7 -#> 9 6 4 +#> * +#> 1 0 1 +#> 2 1 7 +#> 3 5 9 +#> 4 4 2 +#> 5 2 3 +#> 6 20 2 +#> 7 21 4 +#> 8 22 7 +#> 9 6 4 #> 10 36 2 #> # ... with 102 more rows @@ -227,18 +225,18 @@ library(sergeant) # current verison packageVersion("sergeant") -#> [1] '0.3.1.9000' +#> [1] '0.3.2' -dc <- drill_connection("localhost") +dc <- drill_connection("drillex") drill_active(dc) #> [1] TRUE drill_version(dc) -#> [1] "1.9.0" +#> [1] "1.10.0" drill_storage(dc)$name -#> [1] "cp" "dfs" "hbase" "hdfs" "hive" "kudu" "mongo" "my" "s3" +#> [1] "cp" "dfs" "hbase" "hive" "kudu" "mongo" "s3" ``` Working with the built-in JSON data sets: @@ -264,18 +262,18 @@ drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100") #> first_name = col_character(), #> position_id = col_integer() #> ) -#> # A tibble: 100 × 16 +#> # A tibble: 100 x 16 #> store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role -#> * -#> 1 0 F 1 1961-08-26 0 Nowmer President 1994-12-01 Senior Management -#> 2 0 M 1 1915-07-03 1 Whelply VP Country Manager 1994-12-01 Senior Management -#> 3 0 M 1 1969-06-20 1 Spence VP Country Manager 1998-01-01 Senior Management -#> 4 0 F 1 1951-05-10 1 Gutierrez VP Country Manager 1998-01-01 Senior Management -#> 5 0 F 2 1942-10-08 1 Damstra VP Information Systems 1994-12-01 Senior Management -#> 6 0 F 3 1949-03-27 1 Kanagaki VP Human Resources 1994-12-01 Senior Management -#> 7 9 F 11 1922-08-10 5 Brunner Store Manager 1998-01-01 Store Management -#> 8 21 F 11 1979-06-23 5 Blumberg Store Manager 1998-01-01 Store Management -#> 9 0 M 5 1949-08-26 1 Stanz VP Finance 1994-12-01 Senior Management +#> * +#> 1 0 F 1 1961-08-26 0 Nowmer President 1994-12-01 Senior Management +#> 2 0 M 1 1915-07-03 1 Whelply VP Country Manager 1994-12-01 Senior Management +#> 3 0 M 1 1969-06-20 1 Spence VP Country Manager 1998-01-01 Senior Management +#> 4 0 F 1 1951-05-10 1 Gutierrez VP Country Manager 1998-01-01 Senior Management +#> 5 0 F 2 1942-10-08 1 Damstra VP Information Systems 1994-12-01 Senior Management +#> 6 0 F 3 1949-03-27 1 Kanagaki VP Human Resources 1994-12-01 Senior Management +#> 7 9 F 11 1922-08-10 5 Brunner Store Manager 1998-01-01 Store Management +#> 8 21 F 11 1979-06-23 5 Blumberg Store Manager 1998-01-01 Store Management +#> 9 0 M 5 1949-08-26 1 Stanz VP Finance 1994-12-01 Senior Management #> 10 1 M 11 1967-06-20 5 Murraiin Store Manager 1998-01-01 Store Management #> # ... with 90 more rows, and 7 more variables: salary , marital_status , full_name , employee_id , #> # education_level , first_name , position_id @@ -285,38 +283,38 @@ drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY #> cols( #> gender = col_integer() #> ) -#> # A tibble: 2 × 1 +#> # A tibble: 2 x 1 #> gender #> * #> 1 601 #> 2 554 drill_options(dc) -#> # A tibble: 105 × 4 +#> # A tibble: 113 x 4 #> name value type kind -#> * -#> 1 planner.enable_hash_single_key TRUE SYSTEM BOOLEAN -#> 2 planner.enable_limit0_optimization FALSE SYSTEM BOOLEAN -#> 3 store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN -#> 4 planner.enable_constant_folding TRUE SYSTEM BOOLEAN -#> 5 store.json.extended_types FALSE SYSTEM BOOLEAN -#> 6 planner.memory.non_blocking_operators_memory 64 SYSTEM LONG -#> 7 planner.enable_multiphase_agg TRUE SYSTEM BOOLEAN -#> 8 planner.filter.max_selectivity_estimate_factor 1 SYSTEM DOUBLE -#> 9 planner.enable_mux_exchange TRUE SYSTEM BOOLEAN -#> 10 store.parquet.use_new_reader FALSE SYSTEM BOOLEAN -#> # ... with 95 more rows +#> * +#> 1 planner.enable_hash_single_key TRUE SYSTEM BOOLEAN +#> 2 store.parquet.reader.pagereader.queuesize 2 SYSTEM LONG +#> 3 planner.enable_limit0_optimization FALSE SYSTEM BOOLEAN +#> 4 store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN +#> 5 planner.enable_constant_folding TRUE SYSTEM BOOLEAN +#> 6 store.json.extended_types FALSE SYSTEM BOOLEAN +#> 7 planner.memory.non_blocking_operators_memory 64 SYSTEM LONG +#> 8 planner.enable_multiphase_agg TRUE SYSTEM BOOLEAN +#> 9 exec.query_profile.debug_mode FALSE SYSTEM BOOLEAN +#> 10 planner.filter.max_selectivity_estimate_factor 1 SYSTEM DOUBLE +#> # ... with 103 more rows drill_options(dc, "json") -#> # A tibble: 7 × 4 +#> # A tibble: 7 x 4 #> name value type kind #> #> 1 store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN #> 2 store.json.extended_types FALSE SYSTEM BOOLEAN -#> 3 store.json.writer.uglify TRUE SYSTEM BOOLEAN -#> 4 store.json.reader.skip_invalid_records TRUE SYSTEM BOOLEAN -#> 5 store.json.reader.print_skipped_invalid_record_number TRUE SYSTEM BOOLEAN -#> 6 store.json.all_text_mode TRUE SYSTEM BOOLEAN +#> 3 store.json.writer.uglify FALSE SYSTEM BOOLEAN +#> 4 store.json.reader.skip_invalid_records FALSE SYSTEM BOOLEAN +#> 5 store.json.reader.print_skipped_invalid_record_number FALSE SYSTEM BOOLEAN +#> 6 store.json.all_text_mode FALSE SYSTEM BOOLEAN #> 7 store.json.writer.skip_null_fields TRUE SYSTEM BOOLEAN ``` @@ -332,7 +330,7 @@ drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet` #> N_NATIONKEY = col_integer(), #> N_REGIONKEY = col_integer() #> ) -#> # A tibble: 5 × 4 +#> # A tibble: 5 x 4 #> N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY #> * #> 1 haggle. carefully f ALGERIA 0 0 @@ -354,14 +352,14 @@ drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nation #> N_REGIONKEY = col_integer(), #> dir0 = col_character() #> ) -#> # A tibble: 5 × 5 +#> # A tibble: 5 x 5 #> N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY dir0 #> * -#> 1 haggle. carefully f ALGERIA 0 0 nationsMF -#> 2 al foxes promise sly ARGENTINA 1 1 nationsMF -#> 3 y alongside of the p BRAZIL 2 1 nationsMF -#> 4 eas hang ironic, sil CANADA 3 1 nationsMF -#> 5 y above the carefull EGYPT 4 4 nationsMF +#> 1 haggle. carefully f ALGERIA 0 0 nationsSF +#> 2 al foxes promise sly ARGENTINA 1 1 nationsSF +#> 3 y alongside of the p BRAZIL 2 1 nationsSF +#> 4 eas hang ironic, sil CANADA 3 1 nationsSF +#> 5 y above the carefull EGYPT 4 4 nationsSF ``` ### A preview of the built-in support for spatial ops @@ -388,7 +386,7 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat #> lon = col_double(), #> lat = col_double() #> ) -#> # A tibble: 7 × 3 +#> # A tibble: 7 x 3 #> city lon lat #> * #> 1 Burbank -121.9316 37.32328 @@ -406,45 +404,47 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat library(RJDBC) #> Loading required package: rJava -con <- drill_jdbc("drill.local:2181", "jla") -#> Using [jdbc:drill:zk=drill.local:2181/drill/jla]... -# or the following if running drill-embedded -# con <- drill_jdbc("localhost:31010", use_zk=FALSE) +# Use this if connecting to a cluster with zookeeper +# con <- drill_jdbc("drill-node:2181", "drillbits1") + +# Use the following if running drill-embedded +con <- drill_jdbc("localhost:31010", use_zk=FALSE) +#> Using [jdbc:drill:drillbit=localhost:31010]... drill_query(con, "SELECT * FROM cp.`employee.json`") -#> # A tibble: 1,155 × 16 +#> # A tibble: 1,155 x 16 #> employee_id full_name first_name last_name position_id position_title store_id department_id -#> * -#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1 -#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1 -#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1 -#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1 -#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2 -#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3 -#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11 -#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11 -#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5 +#> * +#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1 +#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1 +#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1 +#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1 +#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2 +#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3 +#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11 +#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11 +#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5 #> 10 11 Jonathan Murraiin Jonathan Murraiin 11 Store Manager 1 11 -#> # ... with 1,145 more rows, and 8 more variables: birth_date , hire_date , salary , supervisor_id , +#> # ... with 1,145 more rows, and 8 more variables: birth_date , hire_date , salary , supervisor_id , #> # education_level , marital_status , gender , management_role # but it can work via JDBC function calls, too dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>% tibble::as_tibble() -#> # A tibble: 1,155 × 16 +#> # A tibble: 1,155 x 16 #> employee_id full_name first_name last_name position_id position_title store_id department_id -#> * -#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1 -#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1 -#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1 -#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1 -#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2 -#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3 -#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11 -#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11 -#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5 +#> * +#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1 +#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1 +#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1 +#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1 +#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2 +#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3 +#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11 +#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11 +#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5 #> 10 11 Jonathan Murraiin Jonathan Murraiin 11 Store Manager 1 11 -#> # ... with 1,145 more rows, and 8 more variables: birth_date , hire_date , salary , supervisor_id , +#> # ... with 1,145 more rows, and 8 more variables: birth_date , hire_date , salary , supervisor_id , #> # education_level , marital_status , gender , management_role ``` @@ -460,7 +460,7 @@ library(testthat) #> matches date() -#> [1] "Mon Jan 23 10:36:57 2017" +#> [1] "Tue May 30 17:28:25 2017" test_dir("tests/") #> testthat results ======================================================================================================== diff --git a/man/src_drill.Rd b/man/src_drill.Rd index 2e70b4e..db1c50d 100644 --- a/man/src_drill.Rd +++ b/man/src_drill.Rd @@ -2,37 +2,13 @@ % Please edit documentation in R/dplyr.r \name{src_drill} \alias{src_drill} -\alias{src_tbls.src_drill} -\alias{db_desc.src_drill} -\alias{sql_escape_ident.DrillConnection} -\alias{copy_to.src_drill} \alias{tbl.src_drill} -\alias{db_explain.DrillConnection} -\alias{db_query_fields.DrillConnection} -\alias{db_data_type.DrillConnection} -\alias{sql_translate_env.DrillConnection} -\title{Connect to Drill (using \code{dplyr}).} +\title{Connect to Drill (dplyr)} \usage{ src_drill(host = Sys.getenv("DRILL_HOST", "localhost"), port = as.integer(Sys.getenv("DRILL_PORT", 8047L)), ssl = FALSE) -\method{src_tbls}{src_drill}(x) - -\method{db_desc}{src_drill}(x) - -\method{sql_escape_ident}{DrillConnection}(con, x) - -\method{copy_to}{src_drill}(dest, df) - \method{tbl}{src_drill}(src, from, ...) - -\method{db_explain}{DrillConnection}(con, sql, ...) - -\method{db_query_fields}{DrillConnection}(con, sql, ...) - -\method{db_data_type}{DrillConnection}(con, fields, ...) - -\method{sql_translate_env}{DrillConnection}(con) } \arguments{ \item{host}{Drill host (will pick up the value from \code{DRILL_HOST} env var)} @@ -54,9 +30,7 @@ also been made available to the \code{dplyr} interface. If you have custom Drill SQL functions that need to be implemented please file an issue on GitHub. } \note{ -This is a DBI wrapper around the Drill REST API. - -TODO username/password support +This is a DBI wrapper around the Drill REST API. TODO username/password support } \examples{ \dontrun{ @@ -80,4 +54,3 @@ select(emp, full_name) \%>\% rpdw = rpad_with(full_name, 20L, "*")) } } -\keyword{internal} diff --git a/man/src_tbls.Rd b/man/src_tbls.Rd new file mode 100644 index 0000000..5f91383 --- /dev/null +++ b/man/src_tbls.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr.r +\name{src_tbls.src_drill} +\alias{src_tbls.src_drill} +\alias{db_desc.src_drill} +\alias{sql_escape_ident.DrillConnection} +\alias{copy_to.src_drill} +\alias{db_explain.DrillConnection} +\alias{db_query_fields.DrillConnection} +\alias{db_data_type.DrillConnection} +\alias{sql_translate_env.DrillConnection} +\title{src tbls} +\usage{ +\method{src_tbls}{src_drill}(x) + +\method{db_desc}{src_drill}(x) + +\method{sql_escape_ident}{DrillConnection}(con, x) + +\method{copy_to}{src_drill}(dest, df) + +\method{db_explain}{DrillConnection}(con, sql, ...) + +\method{db_query_fields}{DrillConnection}(con, sql, ...) + +\method{db_data_type}{DrillConnection}(con, fields, ...) + +\method{sql_translate_env}{DrillConnection}(con) +} +\arguments{ +\item{x}{x} +} +\description{ +"SHOW DATABASES" +} +\keyword{internal} diff --git a/sergeant.Rproj b/sergeant.Rproj index 9f58d70..3437464 100644 --- a/sergeant.Rproj +++ b/sergeant.Rproj @@ -3,6 +3,7 @@ Version: 1.0 RestoreWorkspace: No SaveWorkspace: No AlwaysSaveHistory: Default +QuitChildProcessesOnExit: Default EnableCodeIndexing: Yes UseSpacesForTab: Yes diff --git a/tests/testthat/test-sergeant.R b/tests/testthat/test-sergeant.R index 856a30c..636e2bc 100644 --- a/tests/testthat/test-sergeant.R +++ b/tests/testthat/test-sergeant.R @@ -4,7 +4,7 @@ test_that("we can do something", { testthat::skip_on_cran() testthat::skip_on_travis() - ds <- src_drill("drill1") + ds <- src_drill("drillex") db <- tbl(ds, "cp.`employee.json`") count(db, gender, marital_status) %>% @@ -12,10 +12,11 @@ test_that("we can do something", { expect_that(res, is_a("data.frame")) - dc <- drill_connection("drill1") + dc <- drill_connection("drillex") expect_equal(drill_active(dc), TRUE) - con <- drill_jdbc("drill1:2181", "jla") + #con <- drill_jdbc("drill1:2181", "jla") + con <- drill_jdbc("localhost:31010", use_zk=FALSE) res <- drill_query(con, "SELECT * FROM cp.`employee.json`") expect_that(res, is_a("data.frame"))