diff --git a/.Rbuildignore b/.Rbuildignore index f31e09b..1ca9af5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,5 +8,6 @@ ^NOTES\.*html$ ^\.codecov\.yml$ ^README_files$ -^doc$ +^docs$ ^CONDUCT\.md$ +^\.bash_profile$ diff --git a/DESCRIPTION b/DESCRIPTION index ec7b9db..f94b708 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: metis.lite +Package: metis Type: Package Title: Access and Query Amazon Athena via DBI/JDBC Version: 0.3.0 @@ -9,9 +9,9 @@ Authors@R: c( ) Maintainer: Bob Rudis Encoding: UTF-8 -Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, - perform queries and retrieve query results. A lightweight 'RJDBC' implementation - is included along with additional helpers for 'dplyr'/'dplyr' suppprt. +Description: Methods are provided to connect to 'Amazon' 'Athena', lookup + schemas/tables, perform queries and retrieve query results using the + Athena JDBC driver found in 'metis.jars'. SystemRequirements: JDK 1.8+ License: MIT + file LICENSE Suggests: @@ -19,11 +19,12 @@ Suggests: covr Depends: R (>= 3.2.0), + metis.jars, RJDBC Imports: rJava, DBI, bit64, - dbplyr, + methods, aws.signature RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index 69e7ba8..5d7ef27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,9 +2,7 @@ export(Athena) export(athena_connect) -export(db_data_type.AthenaConnection) export(read_credentials) -export(sql_translate_env.AthenaConnection) export(use_credentials) exportClasses(AthenaConnection) exportClasses(AthenaDriver) @@ -21,7 +19,9 @@ exportMethods(fetch) import(DBI) import(RJDBC) import(bit64) -import(dbplyr) +import(metis.jars) importFrom(aws.signature,read_credentials) importFrom(aws.signature,use_credentials) +importFrom(methods,as) +importFrom(methods,callNextMethod) importFrom(rJava,.jcall) diff --git a/R/jdbc.r b/R/jdbc.r index e1c24a6..2084922 100644 --- a/R/jdbc.r +++ b/R/jdbc.r @@ -21,12 +21,13 @@ setClass( #' AthenaJDBC #' +#' @param identifier.quote how to quote identifiers #' @export Athena <- function(identifier.quote = '`') { JDBC( driverClass = "com.simba.athena.jdbc.Driver", - system.file("java", "AthenaJDBC42_2.0.6.jar", package = "metis.lite"), + metis.jars::metis_jar_path(), identifier.quote = identifier.quote ) -> drv @@ -44,6 +45,7 @@ Athena <- function(identifier.quote = '`') { #' - `ComplexTypeColumnLength`: The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L` #' - `StringColumnLength`: The maximum data length for `STRING` columns. Default `255L` #' +#' @param drv driver #' @param provider JDBC auth provider (ideally leave default) #' @param region AWS region the Athena tables are in #' @param s3_staging_dir A write-able bucket on S3 that you have permissions for @@ -54,6 +56,7 @@ Athena <- function(identifier.quote = '`') { #' of data in logs. Set this to a temporary directory or something log4j can use. For #' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or #' their corresponding integer values 0-6. +#' @param fetch_size Athena results fetch size #' @param ... passed on to the driver. See Details. #' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html); #' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) @@ -109,6 +112,9 @@ setMethod( #' AthenaJDBC #' +#' @param jc job ref +#' @param identifier.quote how to quote identifiers +#' @param fetch_size Athena results fetch size #' @export setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer")) diff --git a/R/metis-lite-package.R b/R/metis-lite-package.R deleted file mode 100644 index fd0cfdd..0000000 --- a/R/metis-lite-package.R +++ /dev/null @@ -1,48 +0,0 @@ -#' Access and Query Amazon Athena via DBI/JDBC -#' -#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, -#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation -#' is included along with an interface to the 'AWS' command-line utility. -#' -#' @name metis.lite -#' -#' @section IMPORTANT: -#' -#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting -#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid -#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is -#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this -#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: -#' -#' -#' if (!grepl("-Xrs", getOption("java.parameters", ""))) { -#' options( -#' "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") -#' ) -#' } -#' -#' @md -#' @encoding UTF-8 -#' @keywords internal -#' @docType package -#' @author Bob Rudis (bob@@rud.is) -#' @import RJDBC DBI bit64 dbplyr -#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) -NULL - - -#' Use Credentials from .aws/credentials File -#' -#' @md -#' @importFrom aws.signature use_credentials read_credentials -#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] -#' @name use_credentials -#' @rdname use_credentials -#' @inheritParams aws.signature::use_credentials -#' @export -NULL - -#' @name read_credentials -#' @rdname use_credentials -#' @export -NULL diff --git a/R/metis-package.R b/R/metis-package.R new file mode 100644 index 0000000..4e42307 --- /dev/null +++ b/R/metis-package.R @@ -0,0 +1,33 @@ +#' Access and Query Amazon Athena via DBI/JDBC +#' +#' Methods are provided to connect to 'Amazon' 'Athena', lookup +#' schemas/tables, perform queries and retrieve query results using the +#' Athena JDBC driver found in 'metis.jars'. +#' +#' @name metis +#' +#' @md +#' @encoding UTF-8 +#' @keywords internal +#' @docType package +#' @author Bob Rudis (bob@@rud.is) +#' @import RJDBC DBI bit64 metis.jars +#' @importFrom methods as callNextMethod +#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) +NULL + +#' Use Credentials from .aws/credentials File +#' +#' @md +#' @importFrom aws.signature use_credentials read_credentials +#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] +#' @name use_credentials +#' @rdname use_credentials +#' @inheritParams aws.signature::use_credentials +#' @export +NULL + +#' @name read_credentials +#' @rdname use_credentials +#' @export +NULL diff --git a/R/sql_translate_env.R b/R/sql_translate_env.R deleted file mode 100644 index 4467278..0000000 --- a/R/sql_translate_env.R +++ /dev/null @@ -1,124 +0,0 @@ -#' @rdname Athena -#' @keywords internal -#' @export -db_data_type.AthenaConnection <- function(con, fields, ...) { - print("\n\n\ndb_data_type\n\n\n") - data_type <- function(x) { - switch( - class(x)[1], - integer64 = "BIGINT", - logical = "BOOLEAN", - integer = "INTEGER", - numeric = "DOUBLE", - factor = "CHARACTER", - character = "CHARACTER", - Date = "DATE", - POSIXct = "TIMESTAMP", - stop("Can't map type ", paste(class(x), collapse = "/"), - " to a supported database type.") - ) - } - vapply(fields, data_type, character(1)) -} - -#' @rdname Athena -#' @keywords internal -#' @export -sql_translate_env.AthenaConnection <- function(con) { - - x <- con - - dbplyr::sql_variant( - - scalar = dbplyr::sql_translator( - .parent = dbplyr::base_scalar, - `!=` = dbplyr::sql_infix("<>"), - as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"), - as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"), - as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"), - as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), - as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), - as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), - as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), - as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"), - date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"), - grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"), - gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"), - trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"), - cbrt = dbplyr::sql_prefix("CBRT", 1), - degrees = dbplyr::sql_prefix("DEGREES", 1), - e = dbplyr::sql_prefix("E", 0), - row_number = dbplyr::sql_prefix("row_number", 0), - lshift = dbplyr::sql_prefix("LSHIFT", 2), - mod = dbplyr::sql_prefix("MOD", 2), - age = dbplyr::sql_prefix("AGE", 1), - negative = dbplyr::sql_prefix("NEGATIVE", 1), - pi = dbplyr::sql_prefix("PI", 0), - pow = dbplyr::sql_prefix("POW", 2), - radians = dbplyr::sql_prefix("RADIANS", 1), - rand = dbplyr::sql_prefix("RAND", 0), - rshift = dbplyr::sql_prefix("RSHIFT", 2), - trunc = dbplyr::sql_prefix("TRUNC", 2), - contains = dbplyr::sql_prefix("CONTAINS", 2), - convert_to = dbplyr::sql_prefix("CONVERT_TO", 2), - convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2), - string_binary = dbplyr::sql_prefix("STRING_BINARY", 1), - binary_string = dbplyr::sql_prefix("BINARY_STRING", 1), - to_char = dbplyr::sql_prefix("TO_CHAR", 2), - to_date = dbplyr::sql_prefix("TO_DATE", 2), - to_number = dbplyr::sql_prefix("TO_NUMBER", 2), - char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2), - double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1), - char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1), - flatten = dbplyr::sql_prefix("FLATTEN", 1), - kvgen = dbplyr::sql_prefix("KVGEN", 1), - repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1), - repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2), - ilike = dbplyr::sql_prefix("ILIKE", 2), - init_cap = dbplyr::sql_prefix("INIT_CAP", 1), - length = dbplyr::sql_prefix("LENGTH", 1), - lower = dbplyr::sql_prefix("LOWER", 1), - tolower = dbplyr::sql_prefix("LOWER", 1), - ltrim = dbplyr::sql_prefix("LTRIM", 2), - nullif = dbplyr::sql_prefix("NULLIF", 2), - position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"), - regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3), - rtrim = dbplyr::sql_prefix("RTRIM", 2), - rpad = dbplyr::sql_prefix("RPAD", 2), - rpad_with = dbplyr::sql_prefix("RPAD", 3), - lpad = dbplyr::sql_prefix("LPAD", 2), - lpad_with = dbplyr::sql_prefix("LPAD", 3), - strpos = dbplyr::sql_prefix("STRPOS", 2), - substr = dbplyr::sql_prefix("SUBSTR", 3), - trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"), - upper = dbplyr::sql_prefix("UPPER", 1), - toupper = dbplyr::sql_prefix("UPPER", 1) - ), - - aggregate = dbplyr::sql_translator( - .parent = dbplyr::base_agg, - n = function() dbplyr::sql("COUNT(*)"), - cor = dbplyr::sql_prefix("CORR"), - cov = dbplyr::sql_prefix("COVAR_SAMP"), - sd = dbplyr::sql_prefix("STDDEV_SAMP"), - var = dbplyr::sql_prefix("VAR_SAMP"), - n_distinct = function(x) { - dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")")) - } - ), - - window = dbplyr::sql_translator( - .parent = dbplyr::base_win, - n = function() { dbplyr::win_over(dbplyr::sql("count(*)"), - partition = dbplyr::win_current_group()) }, - cor = dbplyr::win_recycled("corr"), - cov = dbplyr::win_recycled("covar_samp"), - sd = dbplyr::win_recycled("stddev_samp"), - var = dbplyr::win_recycled("var_samp"), - all = dbplyr::win_recycled("bool_and"), - any = dbplyr::win_recycled("bool_or") - ) - - ) - -} \ No newline at end of file diff --git a/R/z-dbGetQuery.R b/R/z-dbGetQuery.R index 9de4762..e76dc7f 100644 --- a/R/z-dbGetQuery.R +++ b/R/z-dbGetQuery.R @@ -24,27 +24,34 @@ list( "1111" = as.character # OTHER ) -> .jdbc_converters +#' Retrieve connection/driver/database metadata +#' +#' @param dbObj driver/connection +#' @param ... unused #' @export #' @keywords internal setMethod("dbGetInfo", "AthenaDriver", def=function(dbObj, ...) list( name = "AthenaJDBC", - driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1], - package_version = utils::packageVersion("metis.lite") + driver_version = metis.jars::simba_driver_version(), + package_version = utils::packageVersion("metis.jars") ) ) +#' Retrieve connection/driver//database metadata +#' +#' @param dbObj driver/connection +#' @param ... unused #' @export #' @keywords internal setMethod("dbGetInfo", "AthenaConnection", def=function(dbObj, ...) list( name = "AthenaJDBC", driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1], - package_version = utils::packageVersion("metis.lite") + package_version = utils::packageVersion("metis") ) ) - #' Fetch records from a previously executed query #' #' Fetch the next `n` elements (rows) from the result set and return them @@ -56,6 +63,7 @@ setMethod("dbGetInfo", "AthenaConnection", def=function(dbObj, ...) #' or `n = Inf` #' to retrieve all pending records. Some implementations may recognize other #' special values. +#' @param block clock size #' @param ... Other arguments passed on to methods. #' @export setMethod( diff --git a/R/zzz.R b/R/zzz.R index 70b17a3..da0e81e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,11 +1,2 @@ .onLoad <- function(libname, pkgname) { - rJava::.jpackage(pkgname, jars = "*", lib.loc = libname) - rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE)) - o <- getOption("java.parameters", "") - if (!any(grepl("-Xrs", o))) { - packageStartupMessage( - "Did not find '-Xrs' in java.parameters option. Until rJava is updated, ", - "please set this up in your/an Rprofile or at the start of scripts." - ) - } } diff --git a/README.Rmd b/README.Rmd index bcedb72..9319d1c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -10,20 +10,11 @@ Access and Query Amazon Athena via DBI/JDBC ## Description -In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`. -#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, +In Greek mythology, Metis was Athena's "helper" so... -## IMPORTANT +Methods are provided to connect to 'Amazon' 'Athena', lookup schemas/tables, +perform queries and retrieve query results via the included JDBC DBI driver. -Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`: - -```r -if (!grepl("-Xrs", getOption("java.parameters", ""))) { - options( - "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") - ) -} -``` ## What's Inside The Tin? The following functions are implemented: @@ -57,11 +48,11 @@ Pulled in from other `cloudyr` pkgs: ## Installation ```{r eval=FALSE} -devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite") +devtools::install_git("https://git.sr.ht/~hrbrmstr/metis") # OR -devtools::install_gitlab("hrbrmstr/metis-lite") +devtools::install_gitlab("hrbrmstr/metis") # OR -devtools::install_github("hrbrmstr/metis-lite") +devtools::install_github("hrbrmstr/metis") ``` ```{r message=FALSE, warning=FALSE, include=FALSE} @@ -71,22 +62,20 @@ options(width=120) ## Usage ```{r message=FALSE, warning=FALSE} -library(metis.lite) +library(metis) # current verison -packageVersion("metis.lite") +packageVersion("metis") ``` ```{r message=FALSE, warning=FALSE} library(rJava) library(RJDBC) -library(metis.lite) -library(magrittr) -library(dbplyr) -library(dplyr) +library(metis) +library(magrittr) dbConnect( - drv = metis.lite::Athena(), + drv = metis::Athena(), schema_name = "sampledb", provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), @@ -100,7 +89,7 @@ dbExistsTable(con, "elb_logs", schema="sampledb") dbListFields(con, "elb_logs", "sampledb") dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% - glimpse() + dplyr::glimpse() ``` ### Check types @@ -124,31 +113,11 @@ SELECT FROM elb_logs LIMIT 1 ") %>% - glimpse() + dplyr::glimpse() ``` -#### dplyr - -```{r} -tbl(con, sql(" -SELECT - CAST('chr' AS CHAR(4)) achar, - CAST('varchr' AS VARCHAR) avarchr, - CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday, - CAST(100.1 AS DOUBLE) AS justadbl, - CAST(127 AS TINYINT) AS asmallint, - CAST(100 AS INTEGER) AS justanint, - CAST(100000000000000000 AS BIGINT) AS abigint, - CAST(('GET' = 'GET') AS BOOLEAN) AS is_get, - ARRAY[1, 2, 3] AS arr, - ARRAY['1', '2, 3', '4'] AS arr, - MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp, - CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw, - CAST('{\"a\":1}' AS JSON) js -FROM elb_logs -LIMIT 1 -")) %>% - glimpse() +```{r cloc} +cloc::cloc_pkg_md() ``` ## Code of Conduct diff --git a/README.md b/README.md index 82f5a56..6bfc09c 100644 --- a/README.md +++ b/README.md @@ -5,27 +5,11 @@ Access and Query Amazon Athena via DBI/JDBC ## Description -In Greek mythology, Metis was Athena’s “helper” so methods are provided -to help you accessing and querying Amazon Athena via DBI/JDBC and/or -`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’, -lookup schemas/tables, +In Greek mythology, Metis was Athena’s “helper” so… -## IMPORTANT - -Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent -from interrupting Athena JDBC calls crash the R interpreter. You need to -set the `-Xrs` option to avoid signals being passed on to the JVM owner. -That has to be done *before* `rJava` is loaded so you either need to -remember to put it at the top of all scripts *or* stick this in your -local `~/.Rprofile` and/or sitewide `Rprofile`: - -``` r -if (!grepl("-Xrs", getOption("java.parameters", ""))) { - options( - "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") - ) -} -``` +Methods are provided to connect to ‘Amazon’ ‘Athena’, lookup +schemas/tables, perform queries and retrieve query results via the +included JDBC DBI driver. ## What’s Inside The Tin? @@ -60,20 +44,20 @@ Pulled in from other `cloudyr` pkgs: ## Installation ``` r -devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite") +devtools::install_git("https://git.sr.ht/~hrbrmstr/metis") # OR -devtools::install_gitlab("hrbrmstr/metis-lite") +devtools::install_gitlab("hrbrmstr/metis") # OR -devtools::install_github("hrbrmstr/metis-lite") +devtools::install_github("hrbrmstr/metis") ``` ## Usage ``` r -library(metis.lite) +library(metis) # current verison -packageVersion("metis.lite") +packageVersion("metis") ``` ## [1] '0.3.0' @@ -81,10 +65,8 @@ packageVersion("metis.lite") ``` r library(rJava) library(RJDBC) -library(metis.lite) -library(magrittr) -library(dbplyr) -library(dplyr) +library(metis) +library(magrittr) dbConnect( drv = metis.lite::Athena(), @@ -116,26 +98,26 @@ dbListFields(con, "elb_logs", "sampledb") ``` r dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% - glimpse() + dplyr::glimpse() ``` ## Observations: 10 ## Variables: 16 - ## $ timestamp "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725… + ## $ timestamp "2014-09-29T03:24:38.169500Z", "2014-09-29T03:25:09.029469Z", "2014-09-29T03:25:39.8676… ## $ elbname "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",… - ## $ requestip "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2… - ## $ requestport 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454 - ## $ backendip "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2… - ## $ backendport 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888 - ## $ requestprocessingtime 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05 - ## $ backendprocessingtime 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518… - ## $ clientresponsetime 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007… - ## $ elbresponsecode "302", "302", "200", "200", "200", "200", "200", "200", "200", "200" - ## $ backendresponsecode "200", "200", "200", "200", "200", "200", "200", "200", "200", "200" + ## $ requestip "253.89.30.138", "248.64.121.231", "245.21.209.210", "244.77.57.59", "244.185.170.87", … + ## $ requestport 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159 + ## $ backendip "253.89.30.138", "244.77.57.59", "240.105.192.251", "253.89.30.138", "248.64.121.231", … + ## $ backendport 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888, 8888 + ## $ requestprocessingtime 7.5e-05, 9.1e-05, 9.0e-05, 9.5e-05, 8.9e-05, 9.3e-05, 8.7e-05, 9.2e-05, 9.0e-05, 9.1e-05 + ## $ backendprocessingtime 0.047465, 0.044693, 0.045687, 0.051089, 0.045445, 0.045845, 0.046027, 0.045039, 0.05010… + ## $ clientresponsetime 6.5e-05, 7.2e-05, 6.4e-05, 7.0e-05, 5.4e-05, 6.7e-05, 5.7e-05, 4.6e-05, 8.7e-05, 4.9e-05 + ## $ elbresponsecode "200", "200", "200", "200", "200", "200", "200", "200", "200", "200" + ## $ backendresponsecode "200", "200", "400", "200", "404", "200", "403", "404", "200", "200" ## $ receivedbytes 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - ## $ sentbytes 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831 + ## $ sentbytes 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ## $ requestverb "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET" - ## $ url "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/… + ## $ url "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=248nnm5", "http://… ## $ protocol "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT… ### Check types @@ -159,7 +141,7 @@ SELECT FROM elb_logs LIMIT 1 ") %>% - glimpse() + dplyr::glimpse() ``` ## Observations: 1 @@ -178,46 +160,14 @@ LIMIT 1 ## $ rw "{x=1, y=2.0}" ## $ js "\"{\\\"a\\\":1}\"" -#### dplyr - ``` r -tbl(con, sql(" -SELECT - CAST('chr' AS CHAR(4)) achar, - CAST('varchr' AS VARCHAR) avarchr, - CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday, - CAST(100.1 AS DOUBLE) AS justadbl, - CAST(127 AS TINYINT) AS asmallint, - CAST(100 AS INTEGER) AS justanint, - CAST(100000000000000000 AS BIGINT) AS abigint, - CAST(('GET' = 'GET') AS BOOLEAN) AS is_get, - ARRAY[1, 2, 3] AS arr, - ARRAY['1', '2, 3', '4'] AS arr, - MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp, - CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw, - CAST('{\"a\":1}' AS JSON) js -FROM elb_logs -LIMIT 1 -")) %>% - glimpse() +cloc::cloc_pkg_md() ``` - ## Observations: ?? - ## Variables: 13 - ## Database: AthenaConnection - ## $ achar "chr " - ## $ avarchr "varchr" - ## $ tsday 2014-09-27 - ## $ justadbl 100.1 - ## $ asmallint 127 - ## $ justanint 100 - ## $ abigint 100000000000000000 - ## $ is_get TRUE - ## $ arr "1, 2, 3" - ## $ arr "1, 2, 3, 4" - ## $ mp "{bar=2, foo=1}" - ## $ rw "{x=1, y=2.0}" - ## $ js "\"{\\\"a\\\":1}\"" +| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | +| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: | +| R | 8 | 0.89 | 232 | 0.85 | 77 | 0.71 | 160 | 0.76 | +| Rmd | 1 | 0.11 | 42 | 0.15 | 32 | 0.29 | 51 | 0.24 | ## Code of Conduct diff --git a/inst/java/AthenaJDBC42_2.0.6.jar b/inst/java/AthenaJDBC42_2.0.6.jar deleted file mode 100644 index 1e1f6c7..0000000 Binary files a/inst/java/AthenaJDBC42_2.0.6.jar and /dev/null differ diff --git a/inst/java/log4j.properties b/inst/java/log4j.properties deleted file mode 100644 index 3485ec5..0000000 --- a/inst/java/log4j.properties +++ /dev/null @@ -1 +0,0 @@ -log4j.rootLogger=WARN diff --git a/man/Athena.Rd b/man/Athena.Rd index f919af0..13b8c9d 100644 --- a/man/Athena.Rd +++ b/man/Athena.Rd @@ -1,18 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jdbc.r, R/sql_translate_env.R +% Please edit documentation in R/jdbc.r \name{Athena} \alias{Athena} -\alias{db_data_type.AthenaConnection} -\alias{sql_translate_env.AthenaConnection} \title{AthenaJDBC} \usage{ Athena(identifier.quote = "`") - -db_data_type.AthenaConnection(con, fields, ...) - -sql_translate_env.AthenaConnection(con) +} +\arguments{ +\item{identifier.quote}{how to quote identifiers} } \description{ AthenaJDBC } -\keyword{internal} diff --git a/man/AthenaConnection-class.Rd b/man/AthenaConnection-class.Rd index d788751..7fbca60 100644 --- a/man/AthenaConnection-class.Rd +++ b/man/AthenaConnection-class.Rd @@ -4,6 +4,13 @@ \name{AthenaConnection-class} \alias{AthenaConnection-class} \title{AthenaJDBC} +\arguments{ +\item{jc}{job ref} + +\item{identifier.quote}{how to quote identifiers} + +\item{fetch_size}{Athena results fetch size} +} \description{ AthenaJDBC } diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd index 001daec..2a28101 100644 --- a/man/dbConnect-AthenaDriver-method.Rd +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -14,6 +14,8 @@ socket_timeout = 10000, log_path = "", log_level = 0, ...) } \arguments{ +\item{drv}{driver} + \item{provider}{JDBC auth provider (ideally leave default)} \item{region}{AWS region the Athena tables are in} @@ -22,6 +24,8 @@ \item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver} +\item{fetch_size}{Athena results fetch size} + \item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.} \item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit diff --git a/man/dbGetInfo-AthenaConnection-method.Rd b/man/dbGetInfo-AthenaConnection-method.Rd new file mode 100644 index 0000000..78ddd40 --- /dev/null +++ b/man/dbGetInfo-AthenaConnection-method.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/z-dbGetQuery.R +\docType{methods} +\name{dbGetInfo,AthenaConnection-method} +\alias{dbGetInfo,AthenaConnection-method} +\title{Retrieve connection/driver//database metadata} +\usage{ +\S4method{dbGetInfo}{AthenaConnection}(dbObj, ...) +} +\arguments{ +\item{dbObj}{driver/connection} + +\item{...}{unused} +} +\description{ +Retrieve connection/driver//database metadata +} +\keyword{internal} diff --git a/man/dbGetInfo-AthenaDriver-method.Rd b/man/dbGetInfo-AthenaDriver-method.Rd new file mode 100644 index 0000000..56f0b5e --- /dev/null +++ b/man/dbGetInfo-AthenaDriver-method.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/z-dbGetQuery.R +\docType{methods} +\name{dbGetInfo,AthenaDriver-method} +\alias{dbGetInfo,AthenaDriver-method} +\title{Retrieve connection/driver/database metadata} +\usage{ +\S4method{dbGetInfo}{AthenaDriver}(dbObj, ...) +} +\arguments{ +\item{dbObj}{driver/connection} + +\item{...}{unused} +} +\description{ +Retrieve connection/driver/database metadata +} +\keyword{internal} diff --git a/man/fetch-AthenaResult-numeric-method.Rd b/man/fetch-AthenaResult-numeric-method.Rd index cde0b0d..f9dc213 100644 --- a/man/fetch-AthenaResult-numeric-method.Rd +++ b/man/fetch-AthenaResult-numeric-method.Rd @@ -16,6 +16,8 @@ or `n = Inf` to retrieve all pending records. Some implementations may recognize other special values.} +\item{block}{clock size} + \item{...}{Other arguments passed on to methods.} } \description{ diff --git a/man/metis.Rd b/man/metis.Rd new file mode 100644 index 0000000..d705391 --- /dev/null +++ b/man/metis.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metis-package.R +\docType{package} +\encoding{UTF-8} +\name{metis} +\alias{metis} +\alias{metis-package} +\title{Access and Query Amazon Athena via DBI/JDBC} +\description{ +Methods are provided to connect to 'Amazon' 'Athena', lookup +schemas/tables, perform queries and retrieve query results using the +Athena JDBC driver found in 'metis.jars'. +} +\references{ +\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} +} +\author{ +Bob Rudis (bob@rud.is) +} +\keyword{internal} diff --git a/man/metis.lite.Rd b/man/metis.lite.Rd deleted file mode 100644 index 633a08f..0000000 --- a/man/metis.lite.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metis-lite-package.R -\docType{package} -\encoding{UTF-8} -\name{metis.lite} -\alias{metis.lite} -\alias{metis.lite-package} -\title{Access and Query Amazon Athena via DBI/JDBC} -\description{ -Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, -perform queries and retrieve query results. A lightweight 'RJDBC' implementation -is included along with an interface to the 'AWS' command-line utility. -} -\section{IMPORTANT}{ - - -Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting -Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid -signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is -loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this -in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) { - options( - "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") - ) -} -} -} - -\references{ -\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} -} -\author{ -Bob Rudis (bob@rud.is) -} -\keyword{internal} diff --git a/man/use_credentials.Rd b/man/use_credentials.Rd index b6a2060..8d9fe15 100644 --- a/man/use_credentials.Rd +++ b/man/use_credentials.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metis-lite-package.R +% Please edit documentation in R/metis-package.R \name{use_credentials} \alias{use_credentials} \alias{read_credentials} diff --git a/metis-lite.Rproj b/metis.Rproj similarity index 100% rename from metis-lite.Rproj rename to metis.Rproj diff --git a/tests/test-all.R b/tests/test-all.R index fb8feef..9bebc31 100644 --- a/tests/test-all.R +++ b/tests/test-all.R @@ -1,2 +1,2 @@ library(testthat) -test_check("metis-lite") +test_check("metis") diff --git a/tests/testthat/test-metis-lite.R b/tests/testthat/test-metis-lite.R deleted file mode 100644 index ab6f62f..0000000 --- a/tests/testthat/test-metis-lite.R +++ /dev/null @@ -1,6 +0,0 @@ -context("basic functionality") -test_that("we can do something", { - - #expect_that(some_function(), is_a("data.frame")) - -}) diff --git a/tests/testthat/test-metis.R b/tests/testthat/test-metis.R new file mode 100644 index 0000000..96f56fe --- /dev/null +++ b/tests/testthat/test-metis.R @@ -0,0 +1,28 @@ +context("Driver & queries work") + +skip_on_cran() + +drv <- metis::Athena() + +expect_is(drv, "AthenaDriver") + +dbConnect( + drv = drv, + schema_name = "sampledb", + provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", + AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), + s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", +) -> con + +expect_is(con, "AthenaConnection") + +expect_equal(dbListTables(con, schema="sampledb"), "elb_logs") + +expect_true(dbExistsTable(con, "elb_logs", schema="sampledb")) + +expect_true("url" %in% dbListFields(con, "elb_logs", "sampledb")) + +expect_is( + dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10"), + "data.frame" +)