diff --git a/DESCRIPTION b/DESCRIPTION index b4ba24b..74ca400 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: metis Type: Package Title: Helpers for Accessing and Querying Amazon Athena -Version: 0.3.0 +Version: 0.4.0 Date: 2018-03-19 Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), @@ -31,6 +31,6 @@ Imports: readr, aws.signature, uuid, - sys, + reticulate, jsonlite -RoxygenNote: 6.0.1.9000 +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index e743d02..1f7afe0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(sql_translate_env,AthenaConnection) export(Athena) export(athena_connect) export(read_credentials) diff --git a/NEWS.md b/NEWS.md index 31a459f..789cc4f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,11 @@ 0.2.0 ========= +- Added `sql_translate_env.AthenaConnection()` + +0.2.0 +========= + - Updated authentication provider to be `com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain` (via @dabdine) - Now supports additional DBI/RJDBC methods including: `dbExistsTable()`, `dbListFields()`, `dbListTables()`, `dbReadTable()` diff --git a/R/awscli-util.R b/R/awscli-util.R deleted file mode 100644 index 905fd63..0000000 --- a/R/awscli-util.R +++ /dev/null @@ -1,26 +0,0 @@ -.aws_bin <- function() { - unname(Sys.which('aws')) -} - -.athenacli <- function(...) { - - args <- c("athena") - - in_args <- list(...) - if (length(in_args) == 0) in_args <- "help" - - args <- c(args, unlist(in_args, use.names=FALSE)) - - res <- sys::exec_internal(.aws_bin(), args = args, error = FALSE) - - if (length(res$stdout) > 0) { - - out <- rawToChar(res$stdout) - - if ("help" %in% args) cat(out, sep="") - - invisible(out) - - } - -} diff --git a/R/list-query-executions.R b/R/list-query-executions.R deleted file mode 100644 index 115a3cd..0000000 --- a/R/list-query-executions.R +++ /dev/null @@ -1,13 +0,0 @@ -list_query_executions <- function(max_items=10, starting_token=NULL, page_size=NULL) { - - - args <- c("list-query-executions", sprintf("--max-items=%s", as.integer(max_items))) - - if (!is.null(starting_token)) args <- c(args, sprintf("--starting-token=%s", starting_token)) - if (!is.null(page_size)) args <- c(args, sprintf("--page-size=%s", as.integer(page_size))) - - res <- .athenacli(args) - - jsonlite::fromJSON() - -} \ No newline at end of file diff --git a/R/sql_translate_env.R b/R/sql_translate_env.R new file mode 100644 index 0000000..0de8bef --- /dev/null +++ b/R/sql_translate_env.R @@ -0,0 +1,101 @@ +#' @rdname Athena +#' @keywords internal +#' @export +sql_translate_env.AthenaConnection <- function(con) { + + x <- con + + dbplyr::sql_variant( + + scalar = dbplyr::sql_translator( + .parent = dbplyr::base_scalar, + `!=` = dbplyr::sql_infix("<>"), + as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"), + as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"), + as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"), + as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), + as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), + as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), + as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), + as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"), + date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"), + grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"), + gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"), + trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"), + cbrt = dbplyr::sql_prefix("CBRT", 1), + degrees = dbplyr::sql_prefix("DEGREES", 1), + e = dbplyr::sql_prefix("E", 0), + row_number = dbplyr::sql_prefix("row_number", 0), + lshift = dbplyr::sql_prefix("LSHIFT", 2), + mod = dbplyr::sql_prefix("MOD", 2), + age = dbplyr::sql_prefix("AGE", 1), + negative = dbplyr::sql_prefix("NEGATIVE", 1), + pi = dbplyr::sql_prefix("PI", 0), + pow = dbplyr::sql_prefix("POW", 2), + radians = dbplyr::sql_prefix("RADIANS", 1), + rand = dbplyr::sql_prefix("RAND", 0), + rshift = dbplyr::sql_prefix("RSHIFT", 2), + trunc = dbplyr::sql_prefix("TRUNC", 2), + contains = dbplyr::sql_prefix("CONTAINS", 2), + convert_to = dbplyr::sql_prefix("CONVERT_TO", 2), + convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2), + string_binary = dbplyr::sql_prefix("STRING_BINARY", 1), + binary_string = dbplyr::sql_prefix("BINARY_STRING", 1), + to_char = dbplyr::sql_prefix("TO_CHAR", 2), + to_date = dbplyr::sql_prefix("TO_DATE", 2), + to_number = dbplyr::sql_prefix("TO_NUMBER", 2), + char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2), + double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1), + char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1), + flatten = dbplyr::sql_prefix("FLATTEN", 1), + kvgen = dbplyr::sql_prefix("KVGEN", 1), + repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1), + repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2), + ilike = dbplyr::sql_prefix("ILIKE", 2), + init_cap = dbplyr::sql_prefix("INIT_CAP", 1), + length = dbplyr::sql_prefix("LENGTH", 1), + lower = dbplyr::sql_prefix("LOWER", 1), + tolower = dbplyr::sql_prefix("LOWER", 1), + ltrim = dbplyr::sql_prefix("LTRIM", 2), + nullif = dbplyr::sql_prefix("NULLIF", 2), + position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"), + regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3), + rtrim = dbplyr::sql_prefix("RTRIM", 2), + rpad = dbplyr::sql_prefix("RPAD", 2), + rpad_with = dbplyr::sql_prefix("RPAD", 3), + lpad = dbplyr::sql_prefix("LPAD", 2), + lpad_with = dbplyr::sql_prefix("LPAD", 3), + strpos = dbplyr::sql_prefix("STRPOS", 2), + substr = dbplyr::sql_prefix("SUBSTR", 3), + trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"), + upper = dbplyr::sql_prefix("UPPER", 1), + toupper = dbplyr::sql_prefix("UPPER", 1) + ), + + aggregate = dbplyr::sql_translator( + .parent = dbplyr::base_agg, + n = function() dbplyr::sql("COUNT(*)"), + cor = dbplyr::sql_prefix("CORR"), + cov = dbplyr::sql_prefix("COVAR_SAMP"), + sd = dbplyr::sql_prefix("STDDEV_SAMP"), + var = dbplyr::sql_prefix("VAR_SAMP"), + n_distinct = function(x) { + dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")")) + } + ), + + window = dbplyr::sql_translator( + .parent = dbplyr::base_win, + n = function() { dbplyr::win_over(dbplyr::sql("count(*)"), + partition = dbplyr::win_current_group()) }, + cor = dbplyr::win_recycled("corr"), + cov = dbplyr::win_recycled("covar_samp"), + sd = dbplyr::win_recycled("stddev_samp"), + var = dbplyr::win_recycled("var_samp"), + all = dbplyr::win_recycled("bool_and"), + any = dbplyr::win_recycled("bool_or") + ) + + ) + +} \ No newline at end of file diff --git a/man/Athena.Rd b/man/Athena.Rd index 11c7752..dea61e7 100644 --- a/man/Athena.Rd +++ b/man/Athena.Rd @@ -1,11 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jdbc.r +% Please edit documentation in R/jdbc.r, R/sql_translate_env.R \name{Athena} \alias{Athena} +\alias{sql_translate_env.AthenaConnection} \title{AthenaJDBC} \usage{ Athena(identifier.quote = "`") + +\method{sql_translate_env}{AthenaConnection}(con) } \description{ AthenaJDBC } +\keyword{internal} diff --git a/man/athena_connect.Rd b/man/athena_connect.Rd index 8a1040b..8fe7594 100644 --- a/man/athena_connect.Rd +++ b/man/athena_connect.Rd @@ -6,9 +6,10 @@ \usage{ athena_connect(default_schema = "default", region = c("us-east-1", "us-east-2", "us-west-2"), - s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10, - connection_timeout = 10000, socket_timeout = 10000, log_path = "", - log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) + s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), + max_error_retries = 10, connection_timeout = 10000, + socket_timeout = 10000, log_path = "", log_level = c("OFF", + "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) } \arguments{ \item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)} diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd index 01c0b6d..0db47cd 100644 --- a/man/dbConnect-AthenaDriver-method.Rd +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -7,10 +7,11 @@ \usage{ \S4method{dbConnect}{AthenaDriver}(drv, provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", - region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), + region = "us-east-1", + s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), schema_name = "default", max_error_retries = 10, - connection_timeout = 10000, socket_timeout = 10000, log_path, log_level, - ...) + connection_timeout = 10000, socket_timeout = 10000, log_path, + log_level, ...) } \arguments{ \item{provider}{JDBC auth provider (ideally leave default)} diff --git a/man/dbExistsTable-AthenaConnection-character-method.Rd b/man/dbExistsTable-AthenaConnection-character-method.Rd index c871f61..24895cd 100644 --- a/man/dbExistsTable-AthenaConnection-character-method.Rd +++ b/man/dbExistsTable-AthenaConnection-character-method.Rd @@ -5,7 +5,8 @@ \alias{dbExistsTable,AthenaConnection,character-method} \title{AthenaJDBC} \usage{ -\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema, ...) +\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema, + ...) } \arguments{ \item{conn}{Athena connection} diff --git a/man/dbListFields-AthenaConnection-character-method.Rd b/man/dbListFields-AthenaConnection-character-method.Rd index 4194f5a..656fb2f 100644 --- a/man/dbListFields-AthenaConnection-character-method.Rd +++ b/man/dbListFields-AthenaConnection-character-method.Rd @@ -5,7 +5,8 @@ \alias{dbListFields,AthenaConnection,character-method} \title{AthenaJDBC} \usage{ -\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema, ...) +\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema, + ...) } \arguments{ \item{conn}{Athena connection} diff --git a/man/dbListTables-AthenaConnection-method.Rd b/man/dbListTables-AthenaConnection-method.Rd index 59b7c98..74976c8 100644 --- a/man/dbListTables-AthenaConnection-method.Rd +++ b/man/dbListTables-AthenaConnection-method.Rd @@ -5,7 +5,8 @@ \alias{dbListTables,AthenaConnection-method} \title{AthenaJDBC} \usage{ -\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema, ...) +\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema, + ...) } \arguments{ \item{conn}{Athena connection}