From ef7bed292226142e3e5177217b26dee1e4fe5868 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Sun, 17 Feb 2019 21:48:57 -0500 Subject: [PATCH] vignette --- .gitignore | 1 + DESCRIPTION | 6 +- R/jdbc.r | 67 +++++++++++-------- R/metis.r | 45 +++++++------ README.Rmd | 35 ++++++---- README.md | 39 +++++------ man/athena_connect.Rd | 27 +++++--- man/dbConnect-AthenaDriver-method.Rd | 46 ++++++++----- tests/testthat/test-metis.R | 8 +-- vignettes/.gitignore | 2 + vignettes/athena-connection-parameters.Rmd | 102 +++++++++++++++++++++++++++++ 11 files changed, 270 insertions(+), 108 deletions(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/athena-connection-parameters.Rmd diff --git a/.gitignore b/.gitignore index cce1f17..393f133 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +inst/doc .DS_Store .Rproj.user .Rhistory diff --git a/DESCRIPTION b/DESCRIPTION index 85c9b56..58c9f75 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,9 @@ SystemRequirements: JDK 1.8+ License: MIT + file LICENSE Suggests: testthat, - covr + covr, + knitr, + rmarkdown Depends: R (>= 3.2.0), metis.jars, @@ -30,4 +32,4 @@ Imports: RoxygenNote: 6.1.1 Remotes: hrbrmstr/metis.jars - +VignetteBuilder: knitr diff --git a/R/jdbc.r b/R/jdbc.r index 2084922..a87fc99 100644 --- a/R/jdbc.r +++ b/R/jdbc.r @@ -39,22 +39,34 @@ Athena <- function(identifier.quote = '`') { #' #' Connect to Athena #' -#' @section Driver Configuration Options: +#' Mandatory JDBC connection parameters are also named function +#' parameters. You can use `...` to supply additional/optional +#' parameters. +#' +#' @section Higlighted Extra Driver Configuration Options: +#' +#' These are take from the second item in References. See that resource +#' for more information. #' #' - `BinaryColumnLength`: The maximum data length for `BINARY` columns. Default `32767L` #' - `ComplexTypeColumnLength`: The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L` #' - `StringColumnLength`: The maximum data length for `STRING` columns. Default `255L` #' #' @param drv driver -#' @param provider JDBC auth provider (ideally leave default) -#' @param region AWS region the Athena tables are in -#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for -#' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver -#' @param max_error_retries,connection_timeout,socket_timeout +#' @param Schema The name of the database schema to use when a schema is not explicitly +#' specified in a query. You can still issue queries on other schemas by explicitly +#' specifying the schema in the query. +#' @param AwsRegion AWS region the Athena tables are in +#' @param AwsCredentialsProviderClass JDBC auth provider; You can add a +#' lengrh1 character vecrtor named parameter `AwsCredentialsProviderArguments` +#' to the `dbConnect()` call to use alternate auth providers. Use a +#' comma-separated list of String arguments. +#' @param S3OutputLocation A write-able bucket on S3 that you have permissions for +#' @param MaxErrorRetry,ConnectTimeout,SocketTimeout #' technical connection info that you should only muck with if you know what you're doing. -#' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit +#' @param LogPath,LogPath The Athena JDBC driver can (shockingly) provide a decent bit #' of data in logs. Set this to a temporary directory or something log4j can use. For -#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or +#' `LogPath` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or #' their corresponding integer values 0-6. #' @param fetch_size Athena results fetch size #' @param ... passed on to the driver. See Details. @@ -68,35 +80,36 @@ setMethod( def = function( drv, - provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", - region = "us-east-1", - s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), - schema_name = "default", + Schema = "default", + AwsRegion = "us-east-1", + AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", + S3OutputLocation = Sys.getenv("AWS_S3_STAGING_DIR", unset = ""), + MaxErrorRetry = 10, + ConnectTimeout = 10000, + SocketTimeout = 10000, + LogPath = "", + LogLevel = 0, fetch_size = 1000L, - max_error_retries = 10, - connection_timeout = 10000, - socket_timeout = 10000, - log_path = "", - log_level = 0, ...) { conn_string = sprintf( - 'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name + 'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', AwsRegion, Schema ) - if (!(log_level %in% 0:6)) log_level <- .ll_trans[log_level] + if (!(LogLevel %in% 0:6)) LogLevel <- .ll_trans[LogLevel] callNextMethod( drv, conn_string, - S3OutputLocation = s3_staging_dir, - Schema = schema_name, - MaxErrorRetry = max_error_retries, - ConnectTimeout = connection_timeout, - SocketTimeout = socket_timeout, - LogPath = log_path, - LogLevel = log_level, - AwsCredentialsProviderClass = provider, + S3OutputLocation = S3OutputLocation, + Schema = Schema, + AwsRegion = AwsRegion, + MaxErrorRetry = MaxErrorRetry, + ConnectTimeout = ConnectTimeout, + SocketTimeout = SocketTimeout, + LogPath = LogPath, + LogLevel = LogLevel, + AwsCredentialsProviderClass = AwsCredentialsProviderClass, ... ) -> jc diff --git a/R/metis.r b/R/metis.r index 0f9f15f..c7809b3 100644 --- a/R/metis.r +++ b/R/metis.r @@ -3,17 +3,24 @@ #' Handles the up-front JDBC config #' #' @md -#' @param default_schema default schema (you'll still need to fully qualify non-default schema table names) +#' @param default_schema the name of the database schema to use when a schema is +#' not explicitly specified in a query. You can still issue queries on other +#' schemas by explicitly specifying the schema in the query. +#' @param provider JDBC auth provider (defaults to `com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain`) #' @param region AWS region (Ref: ) -#' @param s3_staging_dir the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user. -#' @param max_error_retries the maximum number of retries that the JDBC client attempts to make a request to Athena. -#' @param connection_timeout the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated. -#' @param socket_timeout the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena. -# @param retry_base_delay minimum delay amount, in milliseconds, between retrying attempts to connect Athena. -# @param retry_max_backoff_time maximum delay amount, in milliseconds, between retrying attempts to connect Athena. -#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created. +#' @param s3_staging_dir the Amazon S3 location to which your query output is written. +#' The JDBC driver then asks Athena to read the results and provide rows +#' of data back to the user. +#' @param max_error_retries the maximum number of retries that the JDBC client +#' attempts to make a request to Athena. +#' @param connection_timeout the maximum amount of time, in milliseconds, to +#' make a successful connection to Athena before an attempt is terminated. +#' @param socket_timeout the maximum amount of time, in milliseconds, to wait +#' for a socket in order to send data to Athena. +#' @param log_path local path of the Athena JDBC driver logs. If no log path is +#' provided, then no log files are created. #' @param log_level log level of the Athena JDBC driver logs. Use names -#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE". +#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE". #' @param ... passed on to the driver #' @export #' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html); @@ -35,6 +42,7 @@ #' } athena_connect <- function( default_schema = "default", + provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", region = c("us-east-1", "us-east-2", "us-west-2"), s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10, @@ -52,17 +60,16 @@ athena_connect <- function( dbConnect( athena_jdbc, - schema_name = default_schema, - region = region, - s3_staging_dir = s3_staging_dir, - max_error_retries = max_error_retries, - connection_timeout = connection_timeout, - socket_timeout = socket_timeout, - log_path = log_path, - log_level = log_level, + Schema = default_schema, + AwsRegion = region, + S3OutputLocation = s3_staging_dir, + MaxErrorRetry = max_error_retries, + ConnectTimeout = connection_timeout, + SocketTimeout = socket_timeout, + LogPath = log_path, + LogLevel = log_level, + AwsCredentialsProviderClass= provider, ... ) -> con - con - } diff --git a/README.Rmd b/README.Rmd index 9319d1c..19dcbd4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -3,6 +3,20 @@ output: rmarkdown::github_document editor_options: chunk_output_type: console --- +```{r include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warning = FALSE, + fig.retina = 2 +) + +Sys.setenv( + AWS_S3_STAGING_DIR = "s3://aws-athena-query-results-569593279821-us-east-1" +) + +options(width=120) +``` # metis @@ -55,31 +69,28 @@ devtools::install_gitlab("hrbrmstr/metis") devtools::install_github("hrbrmstr/metis") ``` -```{r message=FALSE, warning=FALSE, include=FALSE} -options(width=120) -``` - ## Usage -```{r message=FALSE, warning=FALSE} +```{r} library(metis) # current verison packageVersion("metis") ``` -```{r message=FALSE, warning=FALSE} +```{r cache=FALSE} library(rJava) library(RJDBC) library(metis) -library(magrittr) +library(magrittr) # for piping b/c I'm addicted +``` +```{r} dbConnect( - drv = metis::Athena(), - schema_name = "sampledb", - provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", - AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), - s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", + metis::Athena(), + Schema = "sampledb", + AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", + AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props") ) -> con dbListTables(con, schema="sampledb") diff --git a/README.md b/README.md index 6bfc09c..4d0456f 100644 --- a/README.md +++ b/README.md @@ -66,14 +66,15 @@ packageVersion("metis") library(rJava) library(RJDBC) library(metis) -library(magrittr) +library(magrittr) # for piping b/c I'm addicted +``` +``` r dbConnect( - drv = metis.lite::Athena(), - schema_name = "sampledb", - provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", - AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), - s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", + metis::Athena(), + Schema = "sampledb", + AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", + AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props") ) -> con dbListTables(con, schema="sampledb") @@ -103,21 +104,21 @@ dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% ## Observations: 10 ## Variables: 16 - ## $ timestamp "2014-09-29T03:24:38.169500Z", "2014-09-29T03:25:09.029469Z", "2014-09-29T03:25:39.8676… + ## $ timestamp "2014-09-27T00:00:25.424956Z", "2014-09-27T00:00:56.439218Z", "2014-09-27T00:01:27.4417… ## $ elbname "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",… - ## $ requestip "253.89.30.138", "248.64.121.231", "245.21.209.210", "244.77.57.59", "244.185.170.87", … - ## $ requestport 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159 - ## $ backendip "253.89.30.138", "244.77.57.59", "240.105.192.251", "253.89.30.138", "248.64.121.231", … - ## $ backendport 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888, 8888 - ## $ requestprocessingtime 7.5e-05, 9.1e-05, 9.0e-05, 9.5e-05, 8.9e-05, 9.3e-05, 8.7e-05, 9.2e-05, 9.0e-05, 9.1e-05 - ## $ backendprocessingtime 0.047465, 0.044693, 0.045687, 0.051089, 0.045445, 0.045845, 0.046027, 0.045039, 0.05010… - ## $ clientresponsetime 6.5e-05, 7.2e-05, 6.4e-05, 7.0e-05, 5.4e-05, 6.7e-05, 5.7e-05, 4.6e-05, 8.7e-05, 4.9e-05 + ## $ requestip "241.230.198.83", "252.26.60.51", "250.244.20.109", "247.59.58.167", "254.64.224.54", "… + ## $ requestport 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026 + ## $ backendip "251.192.40.76", "249.89.116.3", "251.111.156.171", "251.139.91.156", "251.111.156.171"… + ## $ backendport 443, 8888, 8888, 8888, 8000, 8888, 8888, 8888, 8888, 8888 + ## $ requestprocessingtime 9.1e-05, 9.4e-05, 8.4e-05, 9.7e-05, 9.1e-05, 9.3e-05, 9.4e-05, 8.3e-05, 9.0e-05, 9.0e-05 + ## $ backendprocessingtime 0.046598, 0.038973, 0.047054, 0.039845, 0.061461, 0.037791, 0.047035, 0.048792, 0.04572… + ## $ clientresponsetime 4.9e-05, 4.7e-05, 4.9e-05, 4.9e-05, 4.0e-05, 7.7e-05, 7.5e-05, 7.3e-05, 4.0e-05, 6.7e-05 ## $ elbresponsecode "200", "200", "200", "200", "200", "200", "200", "200", "200", "200" - ## $ backendresponsecode "200", "200", "400", "200", "404", "200", "403", "404", "200", "200" + ## $ backendresponsecode "200", "200", "200", "200", "200", "400", "400", "200", "200", "200" ## $ receivedbytes 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ## $ sentbytes 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ## $ requestverb "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET" - ## $ url "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=248nnm5", "http://… + ## $ url "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=20g578y", "http://… ## $ protocol "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT… ### Check types @@ -148,7 +149,7 @@ LIMIT 1 ## Variables: 13 ## $ achar "chr " ## $ avarchr "varchr" - ## $ tsday 2014-09-26 + ## $ tsday 2014-09-29 ## $ justadbl 100.1 ## $ asmallint 127 ## $ justanint 100 @@ -166,8 +167,8 @@ cloc::cloc_pkg_md() | Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) | | :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: | -| R | 8 | 0.89 | 232 | 0.85 | 77 | 0.71 | 160 | 0.76 | -| Rmd | 1 | 0.11 | 42 | 0.15 | 32 | 0.29 | 51 | 0.24 | +| R | 8 | 0.89 | 250 | 0.83 | 83 | 0.72 | 194 | 0.79 | +| Rmd | 1 | 0.11 | 50 | 0.17 | 32 | 0.28 | 53 | 0.21 | ## Code of Conduct diff --git a/man/athena_connect.Rd b/man/athena_connect.Rd index fbed4e5..f5948e8 100644 --- a/man/athena_connect.Rd +++ b/man/athena_connect.Rd @@ -4,27 +4,38 @@ \alias{athena_connect} \title{Simplified Athena JDBC connection helper} \usage{ -athena_connect(default_schema = "default", region = c("us-east-1", - "us-east-2", "us-west-2"), +athena_connect(default_schema = "default", + provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", + region = c("us-east-1", "us-east-2", "us-west-2"), s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10, connection_timeout = 10000, socket_timeout = 10000, log_path = "", log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...) } \arguments{ -\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)} +\item{default_schema}{the name of the database schema to use when a schema is +not explicitly specified in a query. You can still issue queries on other +schemas by explicitly specifying the schema in the query.} + +\item{provider}{JDBC auth provider (defaults to \code{com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain})} \item{region}{AWS region (Ref: \url{http://docs.aws.amazon.com/general/latest/gr/rande.html#athena})} -\item{s3_staging_dir}{the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user.} +\item{s3_staging_dir}{the Amazon S3 location to which your query output is written. +The JDBC driver then asks Athena to read the results and provide rows +of data back to the user.} -\item{max_error_retries}{the maximum number of retries that the JDBC client attempts to make a request to Athena.} +\item{max_error_retries}{the maximum number of retries that the JDBC client +attempts to make a request to Athena.} -\item{connection_timeout}{the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.} +\item{connection_timeout}{the maximum amount of time, in milliseconds, to +make a successful connection to Athena before an attempt is terminated.} -\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.} +\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait +for a socket in order to send data to Athena.} -\item{log_path}{local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.} +\item{log_path}{local path of the Athena JDBC driver logs. If no log path is +provided, then no log files are created.} \item{log_level}{log level of the Athena JDBC driver logs. Use names "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".} diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd index 2a28101..1589624 100644 --- a/man/dbConnect-AthenaDriver-method.Rd +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -5,41 +5,53 @@ \alias{dbConnect,AthenaDriver-method} \title{AthenaJDBC} \usage{ -\S4method{dbConnect}{AthenaDriver}(drv, - provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", - region = "us-east-1", - s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), - schema_name = "default", fetch_size = 1000L, - max_error_retries = 10, connection_timeout = 10000, - socket_timeout = 10000, log_path = "", log_level = 0, ...) +\S4method{dbConnect}{AthenaDriver}(drv, Schema = "default", + AwsRegion = "us-east-1", + AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain", + S3OutputLocation = Sys.getenv("AWS_S3_STAGING_DIR", unset = ""), + MaxErrorRetry = 10, ConnectTimeout = 10000, SocketTimeout = 10000, + LogPath = "", LogLevel = 0, fetch_size = 1000L, ...) } \arguments{ \item{drv}{driver} -\item{provider}{JDBC auth provider (ideally leave default)} +\item{Schema}{The name of the database schema to use when a schema is not explicitly +specified in a query. You can still issue queries on other schemas by explicitly +specifying the schema in the query.} -\item{region}{AWS region the Athena tables are in} +\item{AwsRegion}{AWS region the Athena tables are in} -\item{s3_staging_dir}{A write-able bucket on S3 that you have permissions for} +\item{AwsCredentialsProviderClass}{JDBC auth provider; You can add a +lengrh1 character vecrtor named parameter `AwsCredentialsProviderArguments` +to the `dbConnect()` call to use alternate auth providers. Use a +comma-separated list of String arguments.} -\item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver} +\item{S3OutputLocation}{A write-able bucket on S3 that you have permissions for} -\item{fetch_size}{Athena results fetch size} - -\item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.} +\item{MaxErrorRetry, ConnectTimeout, SocketTimeout}{technical connection info that you should only muck with if you know what you're doing.} -\item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit +\item{LogPath, LogPath}{The Athena JDBC driver can (shockingly) provide a decent bit of data in logs. Set this to a temporary directory or something log4j can use. For -`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or +`LogPath` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or their corresponding integer values 0-6.} +\item{fetch_size}{Athena results fetch size} + \item{...}{passed on to the driver. See Details.} } \description{ Connect to Athena } -\section{Driver Configuration Options}{ +\details{ +Mandatory JDBC connection parameters are also named function +parameters. You can use `...` to supply additional/optional +parameters. +} +\section{Higlighted Extra Driver Configuration Options}{ + +These are take from the second item in References. See that resource +for more information. - `BinaryColumnLength`: The maximum data length for `BINARY` columns. Default `32767L` - `ComplexTypeColumnLength`: The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L` diff --git a/tests/testthat/test-metis.R b/tests/testthat/test-metis.R index 96f56fe..8f838ac 100644 --- a/tests/testthat/test-metis.R +++ b/tests/testthat/test-metis.R @@ -6,12 +6,12 @@ drv <- metis::Athena() expect_is(drv, "AthenaDriver") -dbConnect( +metis::dbConnect( drv = drv, - schema_name = "sampledb", - provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", + Schema = "sampledb", + AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), - s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", + S3OutputLocation = "s3://aws-athena-query-results-569593279821-us-east-1", ) -> con expect_is(con, "AthenaConnection") diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/athena-connection-parameters.Rmd b/vignettes/athena-connection-parameters.Rmd new file mode 100644 index 0000000..235b0c3 --- /dev/null +++ b/vignettes/athena-connection-parameters.Rmd @@ -0,0 +1,102 @@ +--- +title: "Athena Connection Parameters" +author: "Bob Rudis" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Athena Connection Parameters} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r echo=FALSE} +structure(list(property = c("AwsCredentialsProviderArguments", +"AwsCredentialsProviderClass", "AwsRegion", "BinaryColumnLength", +"ComplexTypeColumnLength", "ConnectionTest", "ConnectTimeout", +"IdP_Host", "IdP_Port", "LogLevel", "LogPath", "MaxCatalogNameLength", +"MaxColumnNameLength", "MaxErrorRetry", "MaxQueryExecutionPollingInterval", +"MaxSchemaNameLength", "MaxTableNameLength", "MetadataRetrievalMethod", +"NonProxyHosts", "Password", "PreemptiveBasicProxyAuth", "preferred_role", +"Profile", "ProxyDomain", "ProxyHost", "ProxyPort", "ProxyPWD", +"ProxyUID", "ProxyWorkstation", "RowsToFetchPerBlock", "S3OutputEncKMSKey", +"S3OutputEncOption", "S3OutputLocation", "Schema", "SocketTimeout", +"SSL_Insecure", "StringColumnLength", "UseArraySupport", "UseAwsLogger", +"User", "UseResultsetStreaming"), default = c("None", "None", +"None", "32767", "65535", "1", "10", "None", "443", "0", "The current working directory.", +"0", "0", "10", "100", "256", "0", "Auto", "None", "None", "0", +"None", "None", "None", "None", "None", "None", "None", "None", +"10000 for result set streaming, 1000 for pagination", "None", +"None", "None", "\"default\"", "50", "\"false\"", "255", "1", +"0", "None", "1"), type = c("String", "String", "String", "Integer", +"Integer", "Integer", "Integer", "String", "String", "Integer", +"String", "Integer", "Integer", "Integer", "Integer", "Integer", +"Integer", "String", "String", "String", "Integer", "String", +"String", "String", "String", "Integer", "String", "String", +"String", "Integer", "String", "String", "String", "String", +"Integer", "String", "Integer", "Integer", "Integer", "String", +"Integer"), required = c("Yes, if User and Password are not provided, and if AwsCredentialsProviderClass does not have a default constructor.", +"Yes,if User and Password are not provided, or if you are authenticating through AD FS.", +"Yes", "No", "No", "No", "No", "Yes, if authenticating through AD FS.", +"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", +"Yes, if using IAM credentials or the AD FS provider for authentication.", +"No", "No", "No", "No", "No", "No", "Yes, if connecting through a proxy server that requires authentication.", +"Yes, if connecting through a proxy server that requires authentication.", +"No", "No", "Yes, if using SSE_KMS or CSE_KMS encryption.", "No", +"Yes", "No", "No", "No", "No", "No", "No", "Yes, if using IAM credentials or the AD FS provider for authentication.", +"No"), info = c("A comma-separated list of String arguments for the constructor of the AwsCredentialsProviderClass.", +"If you are authenticating through the AD FS credentials provider, then set this property to the FQCN of the AD FS credentials provider. You can set this property in the connection URL or in an AWS profile. If you are authenticating through a class that implements the AWSCredentialsProvider interface, then set this property to the FQCN of the AWSCredentialsProvider interface.", +"The AWS region of the Athena and AWS Glue instance that you want to connect to.", +"The maximum data length for BINARY columns.", "The maximum data length for ARRAY, MAP, and STRUCT columns.", +"This property determines whether the driver verifies the connection by sending a “SELECT 1” query when establishing a connection with Athena. 1: The driver verifies connection by sending a simple “SELECT 1” query to Athena; 0: The driver does not send any query to Athena to verify the connection.", +"The amount of time, in seconds, that the driver waits when establishing a connection before timing out the connection. A value of 0 indicates that the driver never times out the connection.", +"The host name of the AD FS service that you use to authenticate the connection. The host name cannot include any slashes (/).", +"The number of the port that the AD FS service host uses to listen for requests. The port number to specify may differ depending on the AD FS server configuration. If you are not sure which port to specify, contact your system administrator.", +"Use this property to enable or disable logging in the driver and to specify the amount of detail included in log files. When logging is enabled, the driver produces the following log files in the location specified in the LogPath property: 1: An AthenaJDBC_driver.log file that logs driver activity that is not specific to a connection; 2: An AthenaJDBC_connection_[Number].log file for each connection made to the database, where [Number] is a number that distinguishes each log file from the others. This file logs driver activity that is specific to the connection.", +"The full path to the folder where the driver saves log files when logging is enabled.", +"The maximum number of characters that catalog names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", +"The maximum number of characters that column names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", +"The maximum number of times that the driver resubmits a failed request that can be retried, such as a 5xx error from the Athena server.", +"The maximum amount of time, in milliseconds, that the driver waits between attempts when polling the Athena server for query results. You cannot specify an interval that is less than 5ms.", +"The maximum number of characters that schema names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", +"The maximum number of characters that table names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", +"This property determines how the metadata would be retrieved from Athena for different JDBC API calls like getTables, getColumns. Following are the valid values: \"Auto\": During connection time driver will automatically determine whether to use AWS Glue or Query to get metadata for the specified Athena region. If AWS Glue is supported in the region and Athena has been upgraded to use AWS Glue, driver will use AWS Glue to get the metadata. If AWS Glue is not supported in the region or Athena hasn’t been upgraded to use AWS Glue, driver will query Athena to get the metadata; \"Glue\": Driver will use AWS Glue to get the metadata regardless of whether AWS Glue is supported or used in the region; \"Query\": Driver will use Query to get the metadata regardless of whether AWS Glue is supported or used in that region.", +"A list of hosts that the driver can access without connecting through the proxy server, when a proxy connection is enabled. When specifying multiple hosts, each host must be separated by a vertical bar (|). You can specify patterns using asterisks (*) as wildcard characters.", +"If you are using IAM credentials for authentication, then set this property to the secret key provided by your AWS account. If you are authenticating through the AD FS credentials provider, then set this property to the password that you use to access the AD FS server.", +"This property specifies whether the driver pre-emptively authenticates against the proxy server using basic authentication, when a proxy connection is enabled. 1: The driver pre-emptively authenticates the connection using basic authentication; 0: The driver does not pre-emptively authenticate the connection using basic authentication.", +"However, by default, the driver assumes the first role from the list returned in the SAML response from the identity provider.", +"The name of the AWS profile to use, containing any additional connection properties not specified in the connection URL. For example, when configuring the driver to authenticate through AD FS, you can use this property to specify a profile that contains the required AD FS service information. The driver checks the AWS credentials file for the specified profile. The default location for this file is ~/.aws/credentials. You can change this default behavior by setting the AWS_CREDENTIAL_PROFILES_FILE environment variable to the full path and name of a different credentials file. For more information about profiles, see \"Working with AWS Credentials\" in the AWS SDK for Java Developer Guide: https://docs.aws.amazon.com/sdk-for- java/v1/developer-guide/credentials.html.", +"The Windows domain name of the server that you want to authenticate through, when authenticating a proxy connection using the NTLM protocol.", +"The IP address or host name of your proxy server.", "The listening port of your proxy server.", +"The password that you use to access the proxy server.", "The user name that you use to access the proxy server.", +"The Windows workstation name of the server that you want to authenticate through, when authenticating a proxy connection using the NTLM protocol.", +"The maximum number of rows to fetch per stream if using the result set streaming API. The maximum number of rows to fetch per page if using pagination.", +"The KMS key ARN or ID to use when encrypting query results using SSE_KMS or CSE_KMS encryption.", +"The encryption protocol that the driver uses to encrypt your query results before storing them on Amazon S3. \"SSE_S3\": The driver uses server-side encryption with an Amazon S3-managed key; \"SSE_KMS\": The driver uses server-side encryption with an AWS KMS-managed key; \"CSE_KMS\": The driver uses client-side encryption with an AWS KMS-managed key.", +"The path of the Amazon S3 location where you want to store query results, prefixed by s3://.", +"The name of the database schema to use when a schema is not explicitly specified in a query. You can still issue queries on other schemas by explicitly specifying the schema in the query.", +"The amount of time, in seconds, that the driver waits for data to be transferred over an established, open connection before timing out the connection. A value of 0 (NOT recommended) indicates that the driver never times out the connection.", +"This property indicates whether the server certificate of the AD FS host should be verified.", +"The maximum data length for STRING columns.", "This property specifies whether the driver supports getting the ResultSet data as an array.", +"This property specifies whether the driver records the log output from any AWS API calls.", +"If you are using IAM credentials for authentication, then set this property to the access key provided by your AWS account.", +"This property specifies whether the driver uses the AWS result set streaming API for result set fetching." +)), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, +-41L), spec = structure(list(cols = list(property = structure(list(), class = c("collector_character", +"collector")), default = structure(list(), class = c("collector_character", +"collector")), type = structure(list(), class = c("collector_character", +"collector")), required = structure(list(), class = c("collector_character", +"collector")), info = structure(list(), class = c("collector_character", +"collector"))), default = structure(list(), class = c("collector_guess", +"collector")), skip = 1), class = "col_spec")) -> docs +``` + +```{r echo=FALSE} +knitr::kable(docs, "markdown", ) +```