Browse Source

Update to AthenaJDBC42_2.0.2.jar

master
boB Rudis 6 years ago
parent
commit
0a3e157026
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 2
      DESCRIPTION
  2. 66
      R/jdbc.r
  3. 34
      R/metis.r
  4. 4
      R/zzz.R
  5. 10
      README.Rmd
  6. 30
      README.md
  7. BIN
      inst/java/AthenaJDBC41-1.1.0.jar
  8. BIN
      inst/java/AthenaJDBC42_2.0.2.jar
  9. 1
      inst/java/log4j.properties
  10. 13
      man/athena_connect.Rd
  11. 13
      man/dbConnect-AthenaDriver-method.Rd

2
DESCRIPTION

@ -6,6 +6,7 @@ Date: 2018-03-19
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")), comment = c(ORCID = "0000-0001-5670-2640")),
person("Derek", "Abdine", comment = "Authentication driver update"),
person("Zachary", "Kurtz", email = "zdkurtz@gmail.com", role = "ctb") person("Zachary", "Kurtz", email = "zdkurtz@gmail.com", role = "ctb")
) )
Maintainer: Bob Rudis <bob@rud.is> Maintainer: Bob Rudis <bob@rud.is>
@ -15,6 +16,7 @@ Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schema
is included along with an interface to the 'AWS' command-line utility. is included along with an interface to the 'AWS' command-line utility.
URL: https://github.com/hrbrmstr/metis URL: https://github.com/hrbrmstr/metis
BugReports: https://github.com/hrbrmstr/metis/issues BugReports: https://github.com/hrbrmstr/metis/issues
SystemRequirements: JDK 1.8+
License: AGPL License: AGPL
Suggests: Suggests:
testthat, testthat,

66
R/jdbc.r

@ -1,3 +1,8 @@
stats::setNames(
0:6,
c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")
) -> .ll_trans
#' AthenaJDBC #' AthenaJDBC
#' #'
#' @export #' @export
@ -18,8 +23,8 @@ setClass(
Athena <- function(identifier.quote = '`') { Athena <- function(identifier.quote = '`') {
JDBC( JDBC(
driverClass = "com.amazonaws.athena.jdbc.AthenaDriver", driverClass = "com.simba.athena.jdbc.Driver",
system.file("java", "AthenaJDBC41-1.1.0.jar", package = "metis"), system.file("java", "AthenaJDBC42_2.0.2.jar", package = "metis"),
identifier.quote = identifier.quote identifier.quote = identifier.quote
) -> drv ) -> drv
@ -33,10 +38,12 @@ Athena <- function(identifier.quote = '`') {
#' @param region AWS region the Athena tables are in #' @param region AWS region the Athena tables are in
#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for #' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
#' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver #' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver
#' @param max_error_retries,connection_timeout,socket_timeout,retry_base_delay,retry_max_backoff_time #' @param max_error_retries,connection_timeout,socket_timeout
#' technical connection info that you should only muck with if you know what you're doing. #' technical connection info that you should only muck with if you know what you're doing.
#' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit #' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit
#' of data in logs. Set this to a temporary directory or somethign log4j can use. #' of data in logs. Set this to a temporary directory or something log4j can use. For
#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
#' their corresponding integer values 0-6.
#' @param ... unused #' @param ... unused
#' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html> #' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
#' @export #' @export
@ -45,37 +52,42 @@ setMethod(
"dbConnect", "dbConnect",
"AthenaDriver", "AthenaDriver",
def = function(drv, def = function(
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain", drv,
region = "us-east-1", provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), region = "us-east-1",
schema_name = "default", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10, schema_name = "default",
connection_timeout = 10000, max_error_retries = 10,
socket_timeout = 10000, connection_timeout = 10000,
retry_base_delay = 100, socket_timeout = 10000,
retry_max_backoff_time = 1000, # retry_base_delay = 100,
log_path, # retry_max_backoff_time = 1000,
log_level, log_path,
...) { log_level,
...) {
conn_string = sprintf( conn_string = sprintf(
'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name 'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name
) )
if (!(log_level %in% 0:6)) log_level <- .ll_trans[log_level]
callNextMethod( callNextMethod(
drv, drv,
conn_string, conn_string,
s3_staging_dir = s3_staging_dir, S3OutputLocation = s3_staging_dir,
schema_name = schema_name, Schema = schema_name,
max_error_retries = max_error_retries, MaxErrorRetry = max_error_retries,
connection_timeout = connection_timeout, ConnectTimeout = connection_timeout,
socket_timeout = socket_timeout, SocketTimeout = socket_timeout,
retry_base_delay = retry_base_delay, # retry_base_delay = retry_base_delay,
retry_max_backoff_time = retry_max_backoff_time, # retry_max_backoff_time = retry_max_backoff_time,
log_path = log_path, LogPath = log_path,
log_level = log_level, LogLevel = log_level,
aws_credentials_provider_class = provider, AwsCredentialsProviderClass = provider,
... ...
) -> jc ) -> jc

34
R/metis.r

@ -9,10 +9,11 @@
#' @param max_error_retries the maximum number of retries that the JDBC client attempts to make a request to Athena. #' @param max_error_retries the maximum number of retries that the JDBC client attempts to make a request to Athena.
#' @param connection_timeout the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated. #' @param connection_timeout the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.
#' @param socket_timeout the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena. #' @param socket_timeout the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.
#' @param retry_base_delay minimum delay amount, in milliseconds, between retrying attempts to connect Athena. # @param retry_base_delay minimum delay amount, in milliseconds, between retrying attempts to connect Athena.
#' @param retry_max_backoff_time maximum delay amount, in milliseconds, between retrying attempts to connect Athena. # @param retry_max_backoff_time maximum delay amount, in milliseconds, between retrying attempts to connect Athena.
#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created. #' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
#' @param log_level log level of the Athena JDBC driver logs. #' @param log_level log level of the Athena JDBC driver logs. Use names
#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
#' @export #' @export
#' @examples \dontrun{ #' @examples \dontrun{
#' use_credentials("personal") #' use_credentials("personal")
@ -27,21 +28,22 @@
#' dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1") #' dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")
#' #'
#' } #' }
athena_connect <- function(default_schema = "default", athena_connect <- function(
region = c("us-east-1", "us-east-2", "us-west-2"), default_schema = "default",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), region = c("us-east-1", "us-east-2", "us-west-2"),
max_error_retries = 10, s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
connection_timeout = 10000, max_error_retries = 10,
socket_timeout = 10000, connection_timeout = 10000,
retry_base_delay = 100, socket_timeout = 10000,
retry_max_backoff_time = 1000, # retry_base_delay = 100,
log_path = "", # retry_max_backoff_time = 1000,
log_level = c("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE")) { log_path = "",
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) {
athena_jdbc <- Athena() athena_jdbc <- Athena()
region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2")) region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2"))
log_level <- match.arg(log_level, c("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE")) log_level <- match.arg(log_level, c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
# if (!simple) { # if (!simple) {
dbConnect( dbConnect(
@ -52,8 +54,8 @@ athena_connect <- function(default_schema = "default",
max_error_retries = max_error_retries, max_error_retries = max_error_retries,
connection_timeout = connection_timeout, connection_timeout = connection_timeout,
socket_timeout = socket_timeout, socket_timeout = socket_timeout,
retry_base_delay = retry_base_delay, # retry_base_delay = retry_base_delay,
retry_max_backoff_time = retry_max_backoff_time, # retry_max_backoff_time = retry_max_backoff_time,
log_path = log_path, log_path = log_path,
log_level = log_level log_level = log_level
) -> con ) -> con

4
R/zzz.R

@ -0,0 +1,4 @@
.onLoad <- function(libname, pkgname) {
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
}

10
README.Rmd

@ -1,7 +1,8 @@
--- ---
output: rmarkdown::github_document output: rmarkdown::github_document
editor_options:
chunk_output_type: console
--- ---
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)
# `metis` # `metis`
@ -15,10 +16,11 @@ In Greek mythology, Metis was Athena's "helper".
Still fairly beta-quality level but getting there. Still fairly beta-quality level but getting there.
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connections from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
connections from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
The `AthenaJDBC41-1.1.0.jar` JAR file is included out of convenience but that will likely move to a separate package as this gets closer to prime time if this goes on CRAN. The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but that will likely move to a separate package as this gets closer to prime time if this goes on CRAN.
NOTE that the updated driver *REQUIRES JDK 1.8+*.
See the **Usage** section for an example. See the **Usage** section for an example.

30
README.md

@ -1,6 +1,4 @@
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)
# `metis` # `metis`
Helpers for Accessing and Querying Amazon Athena Helpers for Accessing and Querying Amazon Athena
@ -19,10 +17,12 @@ v0.6.0+ and also get around the [`fetchSize`
problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/)
without having to not use `dbGetQuery()`. without having to not use `dbGetQuery()`.
The `AthenaJDBC41-1.1.0.jar` JAR file is included out of convenience but The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but
that will likely move to a separate package as this gets closer to prime that will likely move to a separate package as this gets closer to prime
time if this goes on CRAN. time if this goes on CRAN.
NOTE that the updated driver *REQUIRES JDK 1.8+*.
See the **Usage** section for an example. See the **Usage** section for an example.
## What’s Inside The Tin? ## What’s Inside The Tin?
@ -111,21 +111,21 @@ dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
## Observations: 10 ## Observations: 10
## Variables: 16 ## Variables: 16
## $ timestamp <dttm> 2014-09-30 01:03:00, 2014-09-30 01:03:01, 2014-09-30 01:03:01, 2014-09-30 01:03:01, ... ## $ timestamp <dttm> 2014-09-30 00:00:25, 2014-09-30 00:00:57, 2014-09-30 00:01:06, 2014-09-30 00:01:29, ...
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo... ## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo...
## $ requestip <chr> "253.90.22.60", "253.51.141.83", "245.59.222.144", "241.35.85.250", "246.245.70.48", ... ## $ requestip <chr> "246.247.182.239", "250.128.76.75", "243.157.244.21", "255.172.234.242", "245.27.105....
## $ requestport <dbl> 4095, 14668, 29796, 38607, 32750, 10182, 64948, 51279, 13331, 2700 ## $ requestport <dbl> 33998, 33998, 33998, 33998, 33998, 33998, 33998, 14346, 33998, 33998
## $ backendip <chr> "250.133.18.39", "248.214.120.18", "250.38.70.52", "249.45.101.192", "249.28.120.9", ... ## $ backendip <chr> "251.173.42.143", "254.201.134.52", "240.175.197.76", "255.212.79.68", "250.102.227.5...
## $ backendport <dbl> 8888, 443, 8899, 8888, 8888, 8888, 8888, 8888, 8888, 8000 ## $ backendport <dbl> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8000, 8888, 8888
## $ requestprocessingtime <dbl> 7.3e-05, 8.9e-05, 4.5e-05, 4.3e-05, 7.6e-05, 7.3e-05, 7.7e-05, 4.6e-05, 4.9e-05, 5.3e-05 ## $ requestprocessingtime <dbl> 0.000091, 0.000092, 0.000105, 0.000091, 0.000091, 0.000091, 0.000090, 0.000077, 0.000...
## $ backendprocessingtime <dbl> 0.561864, 0.021517, 0.019530, 0.018937, 0.022727, 0.390384, 0.017017, 0.016437, 0.019... ## $ backendprocessingtime <dbl> 0.048114, 0.055741, 0.008005, 0.037602, 0.039396, 0.053371, 0.040238, 0.192458, 0.027...
## $ clientresponsetime <dbl> 9.0e-05, 7.0e-05, 3.0e-05, 3.3e-05, 3.2e-05, 8.4e-05, 5.2e-05, 7.1e-05, 6.9e-05, 5.4e-05 ## $ clientresponsetime <dbl> 6.2e-05, 5.0e-05, 4.8e-05, 6.1e-05, 4.7e-05, 6.2e-05, 5.5e-05, 8.3e-05, 5.7e-05, 8.5e-05
## $ elbresponsecode <int> 200, 304, 304, 304, 200, 200, 304, 304, 200, 304 ## $ elbresponsecode <int> 200, 200, 302, 200, 200, 200, 200, 500, 200, 200
## $ backendresponsecode <int> 200, 200, 403, 200, 200, 400, 200, 200, 200, 200 ## $ backendresponsecode <int> 404, 200, 200, 200, 200, 200, 400, 500, 200, 200
## $ receivedbytes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ## $ receivedbytes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <dbl> 58402, 0, 0, 0, 152213, 58402, 0, 0, 152213, 0 ## $ sentbytes <dbl> 2, 2, 0, 2, 2, 2, 2, 28098, 2, 2
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET" ## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/static/css/hue3.css", "http://... ## $ url <chr> "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=l29ezwd", "http:...
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "... ## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "...
## Code of Conduct ## Code of Conduct

BIN
inst/java/AthenaJDBC41-1.1.0.jar

Binary file not shown.

BIN
inst/java/AthenaJDBC42_2.0.2.jar

Binary file not shown.

1
inst/java/log4j.properties

@ -0,0 +1 @@
log4j.rootLogger=WARN

13
man/athena_connect.Rd

@ -7,10 +7,8 @@
athena_connect(default_schema = "default", region = c("us-east-1", athena_connect(default_schema = "default", region = c("us-east-1",
"us-east-2", "us-west-2"), "us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10, s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, connection_timeout = 10000, socket_timeout = 10000, log_path = "",
retry_base_delay = 100, retry_max_backoff_time = 1000, log_path = "", log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
log_level = c("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL",
"TRACE"))
} }
\arguments{ \arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)} \item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
@ -25,13 +23,10 @@ athena_connect(default_schema = "default", region = c("us-east-1",
\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.} \item{socket_timeout}{the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.}
\item{retry_base_delay}{minimum delay amount, in milliseconds, between retrying attempts to connect Athena.}
\item{retry_max_backoff_time}{maximum delay amount, in milliseconds, between retrying attempts to connect Athena.}
\item{log_path}{local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.} \item{log_path}{local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.}
\item{log_level}{log level of the Athena JDBC driver logs.} \item{log_level}{log level of the Athena JDBC driver logs. Use names
"OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}
} }
\description{ \description{
Handles the up-front JDBC config Handles the up-front JDBC config

13
man/dbConnect-AthenaDriver-method.Rd

@ -6,12 +6,11 @@
\title{AthenaJDBC} \title{AthenaJDBC}
\usage{ \usage{
\S4method{dbConnect}{AthenaDriver}(drv, \S4method{dbConnect}{AthenaDriver}(drv,
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain", provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default", max_error_retries = 10, schema_name = "default", max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, connection_timeout = 10000, socket_timeout = 10000, log_path, log_level,
retry_base_delay = 100, retry_max_backoff_time = 1000, log_path, ...)
log_level, ...)
} }
\arguments{ \arguments{
\item{provider}{JDBC auth provider (ideally leave default)} \item{provider}{JDBC auth provider (ideally leave default)}
@ -22,10 +21,12 @@
\item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver} \item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver}
\item{max_error_retries, connection_timeout, socket_timeout, retry_base_delay, retry_max_backoff_time}{technical connection info that you should only muck with if you know what you're doing.} \item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.}
\item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit \item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit
of data in logs. Set this to a temporary directory or somethign log4j can use.} of data in logs. Set this to a temporary directory or something log4j can use. For
`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
their corresponding integer values 0-6.}
\item{...}{unused} \item{...}{unused}
} }

Loading…
Cancel
Save