boB Rudis 11 months ago
parent
commit
ef7bed2922
No known key found for this signature in database
11 changed files with 270 additions and 108 deletions
  1. 1
    0
      .gitignore
  2. 4
    2
      DESCRIPTION
  3. 40
    27
      R/jdbc.r
  4. 26
    19
      R/metis.r
  5. 23
    12
      README.Rmd
  6. 20
    19
      README.md
  7. 19
    8
      man/athena_connect.Rd
  8. 29
    17
      man/dbConnect-AthenaDriver-method.Rd
  9. 4
    4
      tests/testthat/test-metis.R
  10. 2
    0
      vignettes/.gitignore
  11. 102
    0
      vignettes/athena-connection-parameters.Rmd

+ 1
- 0
.gitignore View File

@@ -1,3 +1,4 @@
1
+inst/doc
1 2
 .DS_Store
2 3
 .Rproj.user
3 4
 .Rhistory

+ 4
- 2
DESCRIPTION View File

@@ -16,7 +16,9 @@ SystemRequirements: JDK 1.8+
16 16
 License: MIT + file LICENSE
17 17
 Suggests:
18 18
     testthat,
19
-    covr
19
+    covr,
20
+    knitr,
21
+    rmarkdown
20 22
 Depends:
21 23
     R (>= 3.2.0),
22 24
     metis.jars,
@@ -30,4 +32,4 @@ Imports:
30 32
 RoxygenNote: 6.1.1
31 33
 Remotes:
32 34
     hrbrmstr/metis.jars
33
-
35
+VignetteBuilder: knitr

+ 40
- 27
R/jdbc.r View File

@@ -39,22 +39,34 @@ Athena <- function(identifier.quote = '`') {
39 39
 #'
40 40
 #' Connect to Athena
41 41
 #'
42
-#' @section Driver Configuration Options:
42
+#' Mandatory JDBC connection parameters are also named function
43
+#' parameters. You can use `...` to supply additional/optional
44
+#' parameters.
45
+#'
46
+#' @section Higlighted Extra Driver Configuration Options:
47
+#'
48
+#' These are take from the second item in References. See that resource
49
+#' for more information.
43 50
 #'
44 51
 #' - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
45 52
 #' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
46 53
 #' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
47 54
 #'
48 55
 #' @param drv driver
49
-#' @param provider JDBC auth provider (ideally leave default)
50
-#' @param region AWS region the Athena tables are in
51
-#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
52
-#' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver
53
-#' @param max_error_retries,connection_timeout,socket_timeout
56
+#' @param Schema The name of the database schema to use when a schema is not explicitly
57
+#'        specified in a query. You can still issue queries on other schemas by explicitly
58
+#'        specifying the schema in the query.
59
+#' @param AwsRegion AWS region the Athena tables are in
60
+#' @param AwsCredentialsProviderClass JDBC auth provider; You can add a
61
+#'        lengrh1 character vecrtor named parameter `AwsCredentialsProviderArguments`
62
+#'        to the `dbConnect()`  call to use alternate auth providers. Use a
63
+#'        comma-separated list of String arguments.
64
+#' @param S3OutputLocation A write-able bucket on S3 that you have permissions for
65
+#' @param MaxErrorRetry,ConnectTimeout,SocketTimeout
54 66
 #'     technical connection info that you should only muck with if you know what you're doing.
55
-#' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit
67
+#' @param LogPath,LogPath The Athena JDBC driver can (shockingly) provide a decent bit
56 68
 #'     of data in logs. Set this to a temporary directory or something log4j can use. For
57
-#'     `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
69
+#'     `LogPath` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
58 70
 #'     their corresponding integer values 0-6.
59 71
 #' @param fetch_size Athena results fetch size
60 72
 #' @param ... passed on to the driver. See Details.
@@ -68,35 +80,36 @@ setMethod(
68 80
 
69 81
   def = function(
70 82
     drv,
71
-    provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
72
-    region = "us-east-1",
73
-    s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
74
-    schema_name = "default",
83
+    Schema = "default",
84
+    AwsRegion = "us-east-1",
85
+    AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
86
+    S3OutputLocation = Sys.getenv("AWS_S3_STAGING_DIR", unset = ""),
87
+    MaxErrorRetry = 10,
88
+    ConnectTimeout = 10000,
89
+    SocketTimeout = 10000,
90
+    LogPath = "",
91
+    LogLevel = 0,
75 92
     fetch_size = 1000L,
76
-    max_error_retries = 10,
77
-    connection_timeout = 10000,
78
-    socket_timeout = 10000,
79
-    log_path = "",
80
-    log_level = 0,
81 93
     ...) {
82 94
 
83 95
     conn_string = sprintf(
84
-      'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name
96
+      'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', AwsRegion, Schema
85 97
     )
86 98
 
87
-    if (!(log_level %in% 0:6)) log_level <- .ll_trans[log_level]
99
+    if (!(LogLevel %in% 0:6)) LogLevel <- .ll_trans[LogLevel]
88 100
 
89 101
     callNextMethod(
90 102
       drv,
91 103
       conn_string,
92
-      S3OutputLocation = s3_staging_dir,
93
-      Schema = schema_name,
94
-      MaxErrorRetry = max_error_retries,
95
-      ConnectTimeout = connection_timeout,
96
-      SocketTimeout = socket_timeout,
97
-      LogPath = log_path,
98
-      LogLevel = log_level,
99
-      AwsCredentialsProviderClass = provider,
104
+      S3OutputLocation = S3OutputLocation,
105
+      Schema = Schema,
106
+      AwsRegion = AwsRegion,
107
+      MaxErrorRetry = MaxErrorRetry,
108
+      ConnectTimeout = ConnectTimeout,
109
+      SocketTimeout = SocketTimeout,
110
+      LogPath = LogPath,
111
+      LogLevel = LogLevel,
112
+      AwsCredentialsProviderClass = AwsCredentialsProviderClass,
100 113
       ...
101 114
     ) -> jc
102 115
 

+ 26
- 19
R/metis.r View File

@@ -3,17 +3,24 @@
3 3
 #' Handles the up-front JDBC config
4 4
 #'
5 5
 #' @md
6
-#' @param default_schema default schema (you'll still need to fully qualify non-default schema table names)
6
+#' @param default_schema the name of the database schema to use when a schema is
7
+#'        not explicitly specified in a query. You can still issue queries on other
8
+#'        schemas by explicitly specifying the schema in the query.
9
+#' @param provider JDBC auth provider (defaults to `com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain`)
7 10
 #' @param region AWS region (Ref: <http://docs.aws.amazon.com/general/latest/gr/rande.html#athena>)
8
-#' @param s3_staging_dir the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user.
9
-#' @param max_error_retries the maximum number of retries that the JDBC client attempts to make a request to Athena.
10
-#' @param connection_timeout the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.
11
-#' @param socket_timeout the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.
12
-# @param retry_base_delay minimum delay amount, in milliseconds, between retrying attempts to connect Athena.
13
-# @param retry_max_backoff_time maximum delay amount, in milliseconds, between retrying attempts to connect Athena.
14
-#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
11
+#' @param s3_staging_dir the Amazon S3 location to which your query output is written.
12
+#'        The JDBC driver then asks Athena to read the results and provide rows
13
+#'        of data back to the user.
14
+#' @param max_error_retries the maximum number of retries that the JDBC client
15
+#'        attempts to make a request to Athena.
16
+#' @param connection_timeout the maximum amount of time, in milliseconds, to
17
+#'        make a successful connection to Athena before an attempt is terminated.
18
+#' @param socket_timeout the maximum amount of time, in milliseconds, to wait
19
+#'        for a socket in order to send data to Athena.
20
+#' @param log_path local path of the Athena JDBC driver logs. If no log path is
21
+#'        provided, then no log files are created.
15 22
 #' @param log_level log level of the Athena JDBC driver logs. Use  names
16
-#'     "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
23
+#'        "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
17 24
 #' @param ... passed on to the driver
18 25
 #' @export
19 26
 #' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
@@ -35,6 +42,7 @@
35 42
 #' }
36 43
 athena_connect <- function(
37 44
   default_schema = "default",
45
+  provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
38 46
   region = c("us-east-1", "us-east-2", "us-west-2"),
39 47
   s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
40 48
   max_error_retries = 10,
@@ -52,17 +60,16 @@ athena_connect <- function(
52 60
 
53 61
   dbConnect(
54 62
     athena_jdbc,
55
-    schema_name = default_schema,
56
-    region = region,
57
-    s3_staging_dir = s3_staging_dir,
58
-    max_error_retries = max_error_retries,
59
-    connection_timeout = connection_timeout,
60
-    socket_timeout = socket_timeout,
61
-    log_path = log_path,
62
-    log_level = log_level,
63
+    Schema = default_schema,
64
+    AwsRegion = region,
65
+    S3OutputLocation = s3_staging_dir,
66
+    MaxErrorRetry = max_error_retries,
67
+    ConnectTimeout = connection_timeout,
68
+    SocketTimeout = socket_timeout,
69
+    LogPath = log_path,
70
+    LogLevel = log_level,
71
+    AwsCredentialsProviderClass= provider,
63 72
     ...
64 73
   ) -> con
65 74
 
66
-  con
67
-
68 75
 }

+ 23
- 12
README.Rmd View File

@@ -3,6 +3,20 @@ output: rmarkdown::github_document
3 3
 editor_options: 
4 4
   chunk_output_type: console
5 5
 ---
6
+```{r include=FALSE}
7
+knitr::opts_chunk$set(
8
+  echo = TRUE,
9
+  message = FALSE,
10
+  warning = FALSE,
11
+  fig.retina = 2
12
+)
13
+
14
+Sys.setenv(
15
+  AWS_S3_STAGING_DIR = "s3://aws-athena-query-results-569593279821-us-east-1"
16
+)
17
+
18
+options(width=120)
19
+```
6 20
 
7 21
 # metis
8 22
 
@@ -55,31 +69,28 @@ devtools::install_gitlab("hrbrmstr/metis")
55 69
 devtools::install_github("hrbrmstr/metis")
56 70
 ```
57 71
 
58
-```{r message=FALSE, warning=FALSE, include=FALSE}
59
-options(width=120)
60
-```
61
-
62 72
 ## Usage
63 73
 
64
-```{r message=FALSE, warning=FALSE}
74
+```{r}
65 75
 library(metis)
66 76
 
67 77
 # current verison
68 78
 packageVersion("metis")
69 79
 ```
70 80
 
71
-```{r message=FALSE, warning=FALSE}
81
+```{r cache=FALSE}
72 82
 library(rJava)
73 83
 library(RJDBC)
74 84
 library(metis)
75
-library(magrittr) 
85
+library(magrittr) # for piping b/c I'm addicted
86
+```
76 87
 
88
+```{r}
77 89
 dbConnect(
78
-  drv = metis::Athena(),
79
-  schema_name = "sampledb",
80
-  provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
81
-  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
82
-  s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
90
+  metis::Athena(),
91
+  Schema = "sampledb",
92
+  AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
93
+  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props")
83 94
 ) -> con
84 95
 
85 96
 dbListTables(con, schema="sampledb")

+ 20
- 19
README.md View File

@@ -66,14 +66,15 @@ packageVersion("metis")
66 66
 library(rJava)
67 67
 library(RJDBC)
68 68
 library(metis)
69
-library(magrittr) 
69
+library(magrittr) # for piping b/c I'm addicted
70
+```
70 71
 
72
+``` r
71 73
 dbConnect(
72
-  drv = metis.lite::Athena(),
73
-  schema_name = "sampledb",
74
-  provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
75
-  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
76
-  s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
74
+  metis::Athena(),
75
+  Schema = "sampledb",
76
+  AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
77
+  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props")
77 78
 ) -> con
78 79
 
79 80
 dbListTables(con, schema="sampledb")
@@ -103,21 +104,21 @@ dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
103 104
 
104 105
     ## Observations: 10
105 106
     ## Variables: 16
106
-    ## $ timestamp             <chr> "2014-09-29T03:24:38.169500Z", "2014-09-29T03:25:09.029469Z", "2014-09-29T03:25:39.8676
107
+    ## $ timestamp             <chr> "2014-09-27T00:00:25.424956Z", "2014-09-27T00:00:56.439218Z", "2014-09-27T00:01:27.4417
107 108
     ## $ elbname               <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
108
-    ## $ requestip             <chr> "253.89.30.138", "248.64.121.231", "245.21.209.210", "244.77.57.59", "244.185.170.87", 
109
-    ## $ requestport           <int> 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159
110
-    ## $ backendip             <chr> "253.89.30.138", "244.77.57.59", "240.105.192.251", "253.89.30.138", "248.64.121.231", 
111
-    ## $ backendport           <int> 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888, 8888
112
-    ## $ requestprocessingtime <dbl> 7.5e-05, 9.1e-05, 9.0e-05, 9.5e-05, 8.9e-05, 9.3e-05, 8.7e-05, 9.2e-05, 9.0e-05, 9.1e-05
113
-    ## $ backendprocessingtime <dbl> 0.047465, 0.044693, 0.045687, 0.051089, 0.045445, 0.045845, 0.046027, 0.045039, 0.05010
114
-    ## $ clientresponsetime    <dbl> 6.5e-05, 7.2e-05, 6.4e-05, 7.0e-05, 5.4e-05, 6.7e-05, 5.7e-05, 4.6e-05, 8.7e-05, 4.9e-05
109
+    ## $ requestip             <chr> "241.230.198.83", "252.26.60.51", "250.244.20.109", "247.59.58.167", "254.64.224.54", "
110
+    ## $ requestport           <int> 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026, 27026
111
+    ## $ backendip             <chr> "251.192.40.76", "249.89.116.3", "251.111.156.171", "251.139.91.156", "251.111.156.171"
112
+    ## $ backendport           <int> 443, 8888, 8888, 8888, 8000, 8888, 8888, 8888, 8888, 8888
113
+    ## $ requestprocessingtime <dbl> 9.1e-05, 9.4e-05, 8.4e-05, 9.7e-05, 9.1e-05, 9.3e-05, 9.4e-05, 8.3e-05, 9.0e-05, 9.0e-05
114
+    ## $ backendprocessingtime <dbl> 0.046598, 0.038973, 0.047054, 0.039845, 0.061461, 0.037791, 0.047035, 0.048792, 0.04572
115
+    ## $ clientresponsetime    <dbl> 4.9e-05, 4.7e-05, 4.9e-05, 4.9e-05, 4.0e-05, 7.7e-05, 7.5e-05, 7.3e-05, 4.0e-05, 6.7e-05
115 116
     ## $ elbresponsecode       <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
116
-    ## $ backendresponsecode   <chr> "200", "200", "400", "200", "404", "200", "403", "404", "200", "200"
117
+    ## $ backendresponsecode   <chr> "200", "200", "200", "200", "200", "400", "400", "200", "200", "200"
117 118
     ## $ receivedbytes         <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
118 119
     ## $ sentbytes             <S3: integer64> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
119 120
     ## $ requestverb           <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
120
-    ## $ url                   <chr> "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=248nnm5", "http://…
121
+    ## $ url                   <chr> "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=20g578y", "http://…
121 122
     ## $ protocol              <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
122 123
 
123 124
 ### Check types
@@ -148,7 +149,7 @@ LIMIT 1
148 149
     ## Variables: 13
149 150
     ## $ achar     <chr> "chr "
150 151
     ## $ avarchr   <chr> "varchr"
151
-    ## $ tsday     <date> 2014-09-26
152
+    ## $ tsday     <date> 2014-09-29
152 153
     ## $ justadbl  <dbl> 100.1
153 154
     ## $ asmallint <int> 127
154 155
     ## $ justanint <int> 100
@@ -166,8 +167,8 @@ cloc::cloc_pkg_md()
166 167
 
167 168
 | Lang | \# Files |  (%) | LoC |  (%) | Blank lines |  (%) | \# Lines |  (%) |
168 169
 | :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
169
-| R    |        8 | 0.89 | 232 | 0.85 |          77 | 0.71 |      160 | 0.76 |
170
-| Rmd  |        1 | 0.11 |  42 | 0.15 |          32 | 0.29 |       51 | 0.24 |
170
+| R    |        8 | 0.89 | 250 | 0.83 |          83 | 0.72 |      194 | 0.79 |
171
+| Rmd  |        1 | 0.11 |  50 | 0.17 |          32 | 0.28 |       53 | 0.21 |
171 172
 
172 173
 ## Code of Conduct
173 174
 

+ 19
- 8
man/athena_connect.Rd View File

@@ -4,27 +4,38 @@
4 4
 \alias{athena_connect}
5 5
 \title{Simplified Athena JDBC connection helper}
6 6
 \usage{
7
-athena_connect(default_schema = "default", region = c("us-east-1",
8
-  "us-east-2", "us-west-2"),
7
+athena_connect(default_schema = "default",
8
+  provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
9
+  region = c("us-east-1", "us-east-2", "us-west-2"),
9 10
   s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
10 11
   max_error_retries = 10, connection_timeout = 10000,
11 12
   socket_timeout = 10000, log_path = "", log_level = c("OFF",
12 13
   "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...)
13 14
 }
14 15
 \arguments{
15
-\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
16
+\item{default_schema}{the name of the database schema to use when a schema is
17
+not explicitly specified in a query. You can still issue queries on other
18
+schemas by explicitly specifying the schema in the query.}
19
+
20
+\item{provider}{JDBC auth provider (defaults to \code{com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain})}
16 21
 
17 22
 \item{region}{AWS region (Ref: \url{http://docs.aws.amazon.com/general/latest/gr/rande.html#athena})}
18 23
 
19
-\item{s3_staging_dir}{the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user.}
24
+\item{s3_staging_dir}{the Amazon S3 location to which your query output is written.
25
+The JDBC driver then asks Athena to read the results and provide rows
26
+of data back to the user.}
20 27
 
21
-\item{max_error_retries}{the maximum number of retries that the JDBC client attempts to make a request to Athena.}
28
+\item{max_error_retries}{the maximum number of retries that the JDBC client
29
+attempts to make a request to Athena.}
22 30
 
23
-\item{connection_timeout}{the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.}
31
+\item{connection_timeout}{the maximum amount of time, in milliseconds, to
32
+make a successful connection to Athena before an attempt is terminated.}
24 33
 
25
-\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.}
34
+\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait
35
+for a socket in order to send data to Athena.}
26 36
 
27
-\item{log_path}{local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.}
37
+\item{log_path}{local path of the Athena JDBC driver logs. If no log path is
38
+provided, then no log files are created.}
28 39
 
29 40
 \item{log_level}{log level of the Athena JDBC driver logs. Use  names
30 41
 "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}

+ 29
- 17
man/dbConnect-AthenaDriver-method.Rd View File

@@ -5,41 +5,53 @@
5 5
 \alias{dbConnect,AthenaDriver-method}
6 6
 \title{AthenaJDBC}
7 7
 \usage{
8
-\S4method{dbConnect}{AthenaDriver}(drv,
9
-  provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
10
-  region = "us-east-1",
11
-  s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
12
-  schema_name = "default", fetch_size = 1000L,
13
-  max_error_retries = 10, connection_timeout = 10000,
14
-  socket_timeout = 10000, log_path = "", log_level = 0, ...)
8
+\S4method{dbConnect}{AthenaDriver}(drv, Schema = "default",
9
+  AwsRegion = "us-east-1",
10
+  AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
11
+  S3OutputLocation = Sys.getenv("AWS_S3_STAGING_DIR", unset = ""),
12
+  MaxErrorRetry = 10, ConnectTimeout = 10000, SocketTimeout = 10000,
13
+  LogPath = "", LogLevel = 0, fetch_size = 1000L, ...)
15 14
 }
16 15
 \arguments{
17 16
 \item{drv}{driver}
18 17
 
19
-\item{provider}{JDBC auth provider (ideally leave default)}
18
+\item{Schema}{The name of the database schema to use when a schema is not explicitly
19
+specified in a query. You can still issue queries on other schemas by explicitly
20
+specifying the schema in the query.}
20 21
 
21
-\item{region}{AWS region the Athena tables are in}
22
+\item{AwsRegion}{AWS region the Athena tables are in}
22 23
 
23
-\item{s3_staging_dir}{A write-able bucket on S3 that you have permissions for}
24
+\item{AwsCredentialsProviderClass}{JDBC auth provider; You can add a
25
+lengrh1 character vecrtor named parameter `AwsCredentialsProviderArguments`
26
+to the `dbConnect()`  call to use alternate auth providers. Use a
27
+comma-separated list of String arguments.}
24 28
 
25
-\item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver}
29
+\item{S3OutputLocation}{A write-able bucket on S3 that you have permissions for}
26 30
 
27
-\item{fetch_size}{Athena results fetch size}
28
-
29
-\item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.}
31
+\item{MaxErrorRetry, ConnectTimeout, SocketTimeout}{technical connection info that you should only muck with if you know what you're doing.}
30 32
 
31
-\item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit
33
+\item{LogPath, LogPath}{The Athena JDBC driver can (shockingly) provide a decent bit
32 34
 of data in logs. Set this to a temporary directory or something log4j can use. For
33
-`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
35
+`LogPath` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
34 36
 their corresponding integer values 0-6.}
35 37
 
38
+\item{fetch_size}{Athena results fetch size}
39
+
36 40
 \item{...}{passed on to the driver. See Details.}
37 41
 }
38 42
 \description{
39 43
 Connect to Athena
40 44
 }
41
-\section{Driver Configuration Options}{
45
+\details{
46
+Mandatory JDBC connection parameters are also named function
47
+parameters. You can use `...` to supply additional/optional
48
+parameters.
49
+}
50
+\section{Higlighted Extra Driver Configuration Options}{
51
+
42 52
 
53
+These are take from the second item in References. See that resource
54
+for more information.
43 55
 
44 56
 - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
45 57
 - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`

+ 4
- 4
tests/testthat/test-metis.R View File

@@ -6,12 +6,12 @@ drv <- metis::Athena()
6 6
 
7 7
 expect_is(drv, "AthenaDriver")
8 8
 
9
-dbConnect(
9
+metis::dbConnect(
10 10
   drv = drv,
11
-  schema_name = "sampledb",
12
-  provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
11
+  Schema = "sampledb",
12
+  AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
13 13
   AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
14
-  s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
14
+  S3OutputLocation = "s3://aws-athena-query-results-569593279821-us-east-1",
15 15
 ) -> con
16 16
 
17 17
 expect_is(con, "AthenaConnection")

+ 2
- 0
vignettes/.gitignore View File

@@ -0,0 +1,2 @@
1
+*.html
2
+*.R

+ 102
- 0
vignettes/athena-connection-parameters.Rmd View File

@@ -0,0 +1,102 @@
1
+---
2
+title: "Athena Connection Parameters"
3
+author: "Bob Rudis"
4
+date: "`r Sys.Date()`"
5
+output: rmarkdown::html_vignette
6
+vignette: >
7
+  %\VignetteIndexEntry{Athena Connection Parameters}
8
+  %\VignetteEngine{knitr::rmarkdown}
9
+  %\VignetteEncoding{UTF-8}
10
+---
11
+
12
+```{r setup, include = FALSE}
13
+knitr::opts_chunk$set(
14
+  collapse = TRUE,
15
+  comment = "#>"
16
+)
17
+```
18
+
19
+```{r echo=FALSE}
20
+structure(list(property = c("AwsCredentialsProviderArguments", 
21
+"AwsCredentialsProviderClass", "AwsRegion", "BinaryColumnLength", 
22
+"ComplexTypeColumnLength", "ConnectionTest", "ConnectTimeout", 
23
+"IdP_Host", "IdP_Port", "LogLevel", "LogPath", "MaxCatalogNameLength", 
24
+"MaxColumnNameLength", "MaxErrorRetry", "MaxQueryExecutionPollingInterval", 
25
+"MaxSchemaNameLength", "MaxTableNameLength", "MetadataRetrievalMethod", 
26
+"NonProxyHosts", "Password", "PreemptiveBasicProxyAuth", "preferred_role", 
27
+"Profile", "ProxyDomain", "ProxyHost", "ProxyPort", "ProxyPWD", 
28
+"ProxyUID", "ProxyWorkstation", "RowsToFetchPerBlock", "S3OutputEncKMSKey", 
29
+"S3OutputEncOption", "S3OutputLocation", "Schema", "SocketTimeout", 
30
+"SSL_Insecure", "StringColumnLength", "UseArraySupport", "UseAwsLogger", 
31
+"User", "UseResultsetStreaming"), default = c("None", "None", 
32
+"None", "32767", "65535", "1", "10", "None", "443", "0", "The current working directory.", 
33
+"0", "0", "10", "100", "256", "0", "Auto", "None", "None", "0", 
34
+"None", "None", "None", "None", "None", "None", "None", "None", 
35
+"10000 for result set streaming, 1000 for pagination", "None", 
36
+"None", "None", "\"default\"", "50", "\"false\"", "255", "1", 
37
+"0", "None", "1"), type = c("String", "String", "String", "Integer", 
38
+"Integer", "Integer", "Integer", "String", "String", "Integer", 
39
+"String", "Integer", "Integer", "Integer", "Integer", "Integer", 
40
+"Integer", "String", "String", "String", "Integer", "String", 
41
+"String", "String", "String", "Integer", "String", "String", 
42
+"String", "Integer", "String", "String", "String", "String", 
43
+"Integer", "String", "Integer", "Integer", "Integer", "String", 
44
+"Integer"), required = c("Yes, if User and Password are not provided, and if AwsCredentialsProviderClass does not have a default constructor.", 
45
+"Yes,if User and Password are not provided, or if you are authenticating through AD FS.", 
46
+"Yes", "No", "No", "No", "No", "Yes, if authenticating through AD FS.", 
47
+"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
48
+"Yes, if using IAM credentials or the AD FS provider for authentication.", 
49
+"No", "No", "No", "No", "No", "No", "Yes, if connecting through a proxy server that requires authentication.", 
50
+"Yes, if connecting through a proxy server that requires authentication.", 
51
+"No", "No", "Yes, if using SSE_KMS or CSE_KMS encryption.", "No", 
52
+"Yes", "No", "No", "No", "No", "No", "No", "Yes, if using IAM credentials or the AD FS provider for authentication.", 
53
+"No"), info = c("A comma-separated list of String arguments for the constructor of the AwsCredentialsProviderClass.", 
54
+"If you are authenticating through the AD FS credentials provider, then set this property to the FQCN of the AD FS credentials provider. You can set this property in the connection URL or in an AWS profile. If you are authenticating through a class that implements the AWSCredentialsProvider interface, then set this property to the FQCN of the AWSCredentialsProvider interface.", 
55
+"The AWS region of the Athena and AWS Glue instance that you want to connect to.", 
56
+"The maximum data length for BINARY columns.", "The maximum data length for ARRAY, MAP, and STRUCT columns.", 
57
+"This property determines whether the driver verifies the connection by sending a “SELECT 1” query when establishing a connection with Athena. 1: The driver verifies connection by sending a simple “SELECT 1” query to Athena; 0: The driver does not send any query to Athena to verify the connection.", 
58
+"The amount of time, in seconds, that the driver waits when establishing a connection before timing out the connection. A value of 0 indicates that the driver never times out the connection.", 
59
+"The host name of the AD FS service that you use to authenticate the connection. The host name cannot include any slashes (/).", 
60
+"The number of the port that the AD FS service host uses to listen for requests. The port number to specify may differ depending on the AD FS server configuration. If you are not sure which port to specify, contact your system administrator.", 
61
+"Use this property to enable or disable logging in the driver and to specify the amount of detail included in log files. When logging is enabled, the driver produces the following log files in the location specified in the LogPath property: 1: An AthenaJDBC_driver.log file that logs driver activity that is not specific to a connection; 2: An AthenaJDBC_connection_[Number].log file for each connection made to the database, where [Number] is a number that distinguishes each log file from the others. This file logs driver activity that is specific to the connection.", 
62
+"The full path to the folder where the driver saves log files when logging is enabled.", 
63
+"The maximum number of characters that catalog names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", 
64
+"The maximum number of characters that column names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", 
65
+"The maximum number of times that the driver resubmits a failed request that can be retried, such as a 5xx error from the Athena server.", 
66
+"The maximum amount of time, in milliseconds, that the driver waits between attempts when polling the Athena server for query results. You cannot specify an interval that is less than 5ms.", 
67
+"The maximum number of characters that schema names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", 
68
+"The maximum number of characters that table names can contain. To indicate that there is no maximum length or that the length is unknown, set this option to 0.", 
69
+"This property determines how the metadata would be retrieved from Athena for different JDBC API calls like getTables, getColumns. Following are the valid values: \"Auto\": During connection time driver will automatically determine whether to use AWS Glue or Query to get metadata for the specified Athena region. If AWS Glue is supported in the region and Athena has been upgraded to use AWS Glue, driver will use AWS Glue to get the metadata. If AWS Glue is not supported in the region or Athena hasn’t been upgraded to use AWS Glue, driver will query Athena to get the metadata; \"Glue\": Driver will use AWS Glue to get the metadata regardless of whether AWS Glue is supported or used in the region; \"Query\": Driver will use Query to get the metadata regardless of whether AWS Glue is supported or used in that region.", 
70
+"A list of hosts that the driver can access without connecting through the proxy server, when a proxy connection is enabled. When specifying multiple hosts, each host must be separated by a vertical bar (|). You can specify patterns using asterisks (*) as wildcard characters.", 
71
+"If you are using IAM credentials for authentication, then set this property to the secret key provided by your AWS account. If you are authenticating through the AD FS credentials provider, then set this property to the password that you use to access the AD FS server.", 
72
+"This property specifies whether the driver pre-emptively authenticates against the proxy server using basic authentication, when a proxy connection is enabled. 1: The driver pre-emptively authenticates the connection using basic authentication; 0: The driver does not pre-emptively authenticate the connection using basic authentication.", 
73
+"However, by default, the driver assumes the first role from the list returned in the SAML response from the identity provider.", 
74
+"The name of the AWS profile to use, containing any additional connection properties not specified in the connection URL. For example, when configuring the driver to authenticate through AD FS, you can use this property to specify a profile that contains the required AD FS service information. The driver checks the AWS credentials file for the specified profile. The default location for this file is ~/.aws/credentials. You can change this default behavior by setting the AWS_CREDENTIAL_PROFILES_FILE environment variable to the full path and name of a different credentials file. For more information about profiles, see \"Working with AWS Credentials\" in the AWS SDK for Java Developer Guide: https://docs.aws.amazon.com/sdk-for- java/v1/developer-guide/credentials.html.", 
75
+"The Windows domain name of the server that you want to authenticate through, when authenticating a proxy connection using the NTLM protocol.", 
76
+"The IP address or host name of your proxy server.", "The listening port of your proxy server.", 
77
+"The password that you use to access the proxy server.", "The user name that you use to access the proxy server.", 
78
+"The Windows workstation name of the server that you want to authenticate through, when authenticating a proxy connection using the NTLM protocol.", 
79
+"The maximum number of rows to fetch per stream if using the result set streaming API. The maximum number of rows to fetch per page if using pagination.", 
80
+"The KMS key ARN or ID to use when encrypting query results using SSE_KMS or CSE_KMS encryption.", 
81
+"The encryption protocol that the driver uses to encrypt your query results before storing them on Amazon S3. \"SSE_S3\": The driver uses server-side encryption with an Amazon S3-managed key; \"SSE_KMS\": The driver uses server-side encryption with an AWS KMS-managed key; \"CSE_KMS\": The driver uses client-side encryption with an AWS KMS-managed key.", 
82
+"The path of the Amazon S3 location where you want to store query results, prefixed by s3://.", 
83
+"The name of the database schema to use when a schema is not explicitly specified in a query. You can still issue queries on other schemas by explicitly specifying the schema in the query.", 
84
+"The amount of time, in seconds, that the driver waits for data to be transferred over an established, open connection before timing out the connection. A value of 0 (NOT recommended) indicates that the driver never times out the connection.", 
85
+"This property indicates whether the server certificate of the AD FS host should be verified.", 
86
+"The maximum data length for STRING columns.", "This property specifies whether the driver supports getting the ResultSet data as an array.", 
87
+"This property specifies whether the driver records the log output from any AWS API calls.", 
88
+"If you are using IAM credentials for authentication, then set this property to the access key provided by your AWS account.", 
89
+"This property specifies whether the driver uses the AWS result set streaming API for result set fetching."
90
+)), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
91
+-41L), spec = structure(list(cols = list(property = structure(list(), class = c("collector_character", 
92
+"collector")), default = structure(list(), class = c("collector_character", 
93
+"collector")), type = structure(list(), class = c("collector_character", 
94
+"collector")), required = structure(list(), class = c("collector_character", 
95
+"collector")), info = structure(list(), class = c("collector_character", 
96
+"collector"))), default = structure(list(), class = c("collector_guess", 
97
+"collector")), skip = 1), class = "col_spec")) -> docs
98
+```
99
+
100
+```{r echo=FALSE}
101
+knitr::kable(docs, "markdown", )
102
+```

Loading…
Cancel
Save