Browse Source

update

master
boB Rudis 1 year ago
parent
commit
3cb5c76989
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
12 changed files with 129 additions and 52 deletions
  1. +3
    -3
      DESCRIPTION
  2. +1
    -0
      NAMESPACE
  3. +5
    -0
      NEWS.md
  4. +0
    -26
      R/awscli-util.R
  5. +0
    -13
      R/list-query-executions.R
  6. +101
    -0
      R/sql_translate_env.R
  7. +5
    -1
      man/Athena.Rd
  8. +4
    -3
      man/athena_connect.Rd
  9. +4
    -3
      man/dbConnect-AthenaDriver-method.Rd
  10. +2
    -1
      man/dbExistsTable-AthenaConnection-character-method.Rd
  11. +2
    -1
      man/dbListFields-AthenaConnection-character-method.Rd
  12. +2
    -1
      man/dbListTables-AthenaConnection-method.Rd

+ 3
- 3
DESCRIPTION View File

@@ -1,7 +1,7 @@
Package: metis
Type: Package
Title: Helpers for Accessing and Querying Amazon Athena
Version: 0.3.0
Version: 0.4.0
Date: 2018-03-19
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
@@ -31,6 +31,6 @@ Imports:
readr,
aws.signature,
uuid,
sys,
reticulate,
jsonlite
RoxygenNote: 6.0.1.9000
RoxygenNote: 6.1.1

+ 1
- 0
NAMESPACE View File

@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(sql_translate_env,AthenaConnection)
export(Athena)
export(athena_connect)
export(read_credentials)


+ 5
- 0
NEWS.md View File

@@ -1,6 +1,11 @@
0.2.0
=========

- Added `sql_translate_env.AthenaConnection()`

0.2.0
=========

- Updated authentication provider to be `com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain` (via @dabdine)
- Now supports additional DBI/RJDBC methods including: `dbExistsTable()`,
`dbListFields()`, `dbListTables()`, `dbReadTable()`


+ 0
- 26
R/awscli-util.R View File

@@ -1,26 +0,0 @@
.aws_bin <- function() {
unname(Sys.which('aws'))
}

.athenacli <- function(...) {

args <- c("athena")

in_args <- list(...)
if (length(in_args) == 0) in_args <- "help"

args <- c(args, unlist(in_args, use.names=FALSE))

res <- sys::exec_internal(.aws_bin(), args = args, error = FALSE)

if (length(res$stdout) > 0) {

out <- rawToChar(res$stdout)

if ("help" %in% args) cat(out, sep="")

invisible(out)

}

}

+ 0
- 13
R/list-query-executions.R View File

@@ -1,13 +0,0 @@
list_query_executions <- function(max_items=10, starting_token=NULL, page_size=NULL) {


args <- c("list-query-executions", sprintf("--max-items=%s", as.integer(max_items)))

if (!is.null(starting_token)) args <- c(args, sprintf("--starting-token=%s", starting_token))
if (!is.null(page_size)) args <- c(args, sprintf("--page-size=%s", as.integer(page_size)))

res <- .athenacli(args)

jsonlite::fromJSON()

}

+ 101
- 0
R/sql_translate_env.R View File

@@ -0,0 +1,101 @@
#' @rdname Athena
#' @keywords internal
#' @export
sql_translate_env.AthenaConnection <- function(con) {

x <- con

dbplyr::sql_variant(

scalar = dbplyr::sql_translator(
.parent = dbplyr::base_scalar,
`!=` = dbplyr::sql_infix("<>"),
as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"),
as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"),
as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"),
as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"),
date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"),
grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"),
gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"),
trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"),
cbrt = dbplyr::sql_prefix("CBRT", 1),
degrees = dbplyr::sql_prefix("DEGREES", 1),
e = dbplyr::sql_prefix("E", 0),
row_number = dbplyr::sql_prefix("row_number", 0),
lshift = dbplyr::sql_prefix("LSHIFT", 2),
mod = dbplyr::sql_prefix("MOD", 2),
age = dbplyr::sql_prefix("AGE", 1),
negative = dbplyr::sql_prefix("NEGATIVE", 1),
pi = dbplyr::sql_prefix("PI", 0),
pow = dbplyr::sql_prefix("POW", 2),
radians = dbplyr::sql_prefix("RADIANS", 1),
rand = dbplyr::sql_prefix("RAND", 0),
rshift = dbplyr::sql_prefix("RSHIFT", 2),
trunc = dbplyr::sql_prefix("TRUNC", 2),
contains = dbplyr::sql_prefix("CONTAINS", 2),
convert_to = dbplyr::sql_prefix("CONVERT_TO", 2),
convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2),
string_binary = dbplyr::sql_prefix("STRING_BINARY", 1),
binary_string = dbplyr::sql_prefix("BINARY_STRING", 1),
to_char = dbplyr::sql_prefix("TO_CHAR", 2),
to_date = dbplyr::sql_prefix("TO_DATE", 2),
to_number = dbplyr::sql_prefix("TO_NUMBER", 2),
char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2),
double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1),
char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1),
flatten = dbplyr::sql_prefix("FLATTEN", 1),
kvgen = dbplyr::sql_prefix("KVGEN", 1),
repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1),
repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2),
ilike = dbplyr::sql_prefix("ILIKE", 2),
init_cap = dbplyr::sql_prefix("INIT_CAP", 1),
length = dbplyr::sql_prefix("LENGTH", 1),
lower = dbplyr::sql_prefix("LOWER", 1),
tolower = dbplyr::sql_prefix("LOWER", 1),
ltrim = dbplyr::sql_prefix("LTRIM", 2),
nullif = dbplyr::sql_prefix("NULLIF", 2),
position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"),
regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3),
rtrim = dbplyr::sql_prefix("RTRIM", 2),
rpad = dbplyr::sql_prefix("RPAD", 2),
rpad_with = dbplyr::sql_prefix("RPAD", 3),
lpad = dbplyr::sql_prefix("LPAD", 2),
lpad_with = dbplyr::sql_prefix("LPAD", 3),
strpos = dbplyr::sql_prefix("STRPOS", 2),
substr = dbplyr::sql_prefix("SUBSTR", 3),
trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"),
upper = dbplyr::sql_prefix("UPPER", 1),
toupper = dbplyr::sql_prefix("UPPER", 1)
),

aggregate = dbplyr::sql_translator(
.parent = dbplyr::base_agg,
n = function() dbplyr::sql("COUNT(*)"),
cor = dbplyr::sql_prefix("CORR"),
cov = dbplyr::sql_prefix("COVAR_SAMP"),
sd = dbplyr::sql_prefix("STDDEV_SAMP"),
var = dbplyr::sql_prefix("VAR_SAMP"),
n_distinct = function(x) {
dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")"))
}
),

window = dbplyr::sql_translator(
.parent = dbplyr::base_win,
n = function() { dbplyr::win_over(dbplyr::sql("count(*)"),
partition = dbplyr::win_current_group()) },
cor = dbplyr::win_recycled("corr"),
cov = dbplyr::win_recycled("covar_samp"),
sd = dbplyr::win_recycled("stddev_samp"),
var = dbplyr::win_recycled("var_samp"),
all = dbplyr::win_recycled("bool_and"),
any = dbplyr::win_recycled("bool_or")
)

)

}

+ 5
- 1
man/Athena.Rd View File

@@ -1,11 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
% Please edit documentation in R/jdbc.r, R/sql_translate_env.R
\name{Athena}
\alias{Athena}
\alias{sql_translate_env.AthenaConnection}
\title{AthenaJDBC}
\usage{
Athena(identifier.quote = "`")

\method{sql_translate_env}{AthenaConnection}(con)
}
\description{
AthenaJDBC
}
\keyword{internal}

+ 4
- 3
man/athena_connect.Rd View File

@@ -6,9 +6,10 @@
\usage{
athena_connect(default_schema = "default", region = c("us-east-1",
"us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, log_path = "",
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = c("OFF",
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
}
\arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}


+ 4
- 3
man/dbConnect-AthenaDriver-method.Rd View File

@@ -7,10 +7,11 @@
\usage{
\S4method{dbConnect}{AthenaDriver}(drv,
provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default", max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, log_path, log_level,
...)
connection_timeout = 10000, socket_timeout = 10000, log_path,
log_level, ...)
}
\arguments{
\item{provider}{JDBC auth provider (ideally leave default)}


+ 2
- 1
man/dbExistsTable-AthenaConnection-character-method.Rd View File

@@ -5,7 +5,8 @@
\alias{dbExistsTable,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema, ...)
\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema,
...)
}
\arguments{
\item{conn}{Athena connection}


+ 2
- 1
man/dbListFields-AthenaConnection-character-method.Rd View File

@@ -5,7 +5,8 @@
\alias{dbListFields,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema, ...)
\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema,
...)
}
\arguments{
\item{conn}{Athena connection}


+ 2
- 1
man/dbListTables-AthenaConnection-method.Rd View File

@@ -5,7 +5,8 @@
\alias{dbListTables,AthenaConnection-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema, ...)
\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema,
...)
}
\arguments{
\item{conn}{Athena connection}


Loading…
Cancel
Save