Browse Source

dplyr bits working

boB Rudis 11 months ago
parent
commit
646d4938e8
No known key found for this signature in database
13 changed files with 325 additions and 149 deletions
  1. 2
    2
      DESCRIPTION
  2. 4
    0
      R/a-utils.R
  3. 20
    4
      R/jdbc.r
  4. 2
    1
      R/metis-lite-package.R
  5. 15
    16
      R/metis.r
  6. 10
    4
      R/z-dbGetQuery.R
  7. 8
    1
      R/zzz.R
  8. 87
    42
      README.Rmd
  9. 143
    64
      README.md
  10. 14
    6
      man/athena_connect.Rd
  11. 15
    6
      man/dbConnect-AthenaDriver-method.Rd
  12. 1
    2
      man/dbGetQuery-AthenaConnection-character-method.Rd
  13. 4
    1
      man/metis.lite.Rd

+ 2
- 2
DESCRIPTION View File

@@ -1,6 +1,6 @@
1 1
 Package: metis.lite
2 2
 Type: Package
3
-Title: Helpers for Accessing and Querying Amazon Athena
3
+Title: Access and Query Amazon Athena via DBI/JDBC
4 4
 Version: 0.3.0
5 5
 Date: 2019-02-14
6 6
 Authors@R: c(
@@ -11,7 +11,7 @@ Maintainer: Bob Rudis <bob@rud.is>
11 11
 Encoding: UTF-8
12 12
 Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
13 13
     perform queries and retrieve query results. A lightweight 'RJDBC' implementation 
14
-    is included along with an interface to the 'AWS' command-line utility.
14
+    is included along with additional helpers for 'dplyr'/'dplyr' suppprt.
15 15
 SystemRequirements: JDK 1.8+
16 16
 License: MIT + file LICENSE
17 17
 Suggests:

+ 4
- 0
R/a-utils.R View File

@@ -3,6 +3,10 @@ set_names <- function (object = nm, nm) {
3 3
   object
4 4
 }
5 5
 
6
+as_logical <- function(x) {
7
+  as.logical(as.integer(x))
8
+}
9
+
6 10
 as_date <- function(x) {
7 11
   as.Date(x, origin = "1970-01-01")
8 12
 }

+ 20
- 4
R/jdbc.r View File

@@ -36,6 +36,14 @@ Athena <- function(identifier.quote = '`') {
36 36
 
37 37
 #' AthenaJDBC
38 38
 #'
39
+#' Connect to Athena
40
+#'
41
+#' @section Driver Configuration Options:
42
+#'
43
+#' - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
44
+#' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
45
+#' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
46
+#'
39 47
 #' @param provider JDBC auth provider (ideally leave default)
40 48
 #' @param region AWS region the Athena tables are in
41 49
 #' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
@@ -46,8 +54,9 @@ Athena <- function(identifier.quote = '`') {
46 54
 #'     of data in logs. Set this to a temporary directory or something log4j can use. For
47 55
 #'     `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
48 56
 #'     their corresponding integer values 0-6.
49
-#' @param ... unused
50
-#' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
57
+#' @param ... passed on to the driver. See Details.
58
+#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
59
+#'     [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
51 60
 #' @export
52 61
 setMethod(
53 62
 
@@ -60,6 +69,7 @@ setMethod(
60 69
     region = "us-east-1",
61 70
     s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
62 71
     schema_name = "default",
72
+    fetch_size = 1000L,
63 73
     max_error_retries = 10,
64 74
     connection_timeout = 10000,
65 75
     socket_timeout = 10000,
@@ -87,7 +97,11 @@ setMethod(
87 97
       ...
88 98
     ) -> jc
89 99
 
90
-    return(as(jc, "AthenaConnection"))
100
+
101
+    jc <- as(jc, "AthenaConnection")
102
+    jc@fetch_size <- as.integer(fetch_size)
103
+
104
+    return(jc)
91 105
 
92 106
   }
93 107
 
@@ -96,7 +110,9 @@ setMethod(
96 110
 #' AthenaJDBC
97 111
 #'
98 112
 #' @export
99
-setClass("AthenaConnection", contains = "JDBCConnection")
113
+setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer"))
114
+
115
+# setClass("AthenaConnection", contains = "JDBCConnection")
100 116
 
101 117
 #' AthenaJDBC
102 118
 #'

+ 2
- 1
R/metis-lite-package.R View File

@@ -1,4 +1,4 @@
1
-#' Helpers for Accessing and Querying Amazon Athena
1
+#' Access and Query Amazon Athena via DBI/JDBC
2 2
 #'
3 3
 #' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
4 4
 #' perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@@ -27,6 +27,7 @@
27 27
 #' @docType package
28 28
 #' @author Bob Rudis (bob@@rud.is)
29 29
 #' @import RJDBC DBI bit64 dbplyr
30
+#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
30 31
 NULL
31 32
 
32 33
 

+ 15
- 16
R/metis.r View File

@@ -1,4 +1,4 @@
1
-#' Make a JDBC connection to Athena
1
+#' Simplified Athena JDBC connection helper
2 2
 #'
3 3
 #' Handles the up-front JDBC config
4 4
 #'
@@ -14,14 +14,19 @@
14 14
 #' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
15 15
 #' @param log_level log level of the Athena JDBC driver logs. Use  names
16 16
 #'     "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
17
+#' @param ... passed on to the driver
17 18
 #' @export
19
+#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
20
+#'     [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
18 21
 #' @examples \dontrun{
19 22
 #' use_credentials("personal")
20 23
 #'
21
-#' ath <- athena_connect(default_schema = "sampledb",
22
-#'                       s3_staging_dir = "s3://accessible-bucket",
23
-#'                       log_path = "/tmp/athena.log",
24
-#'                       log_level = "DEBUG")
24
+#' athena_connect(
25
+#'   default_schema = "sampledb",
26
+#'   s3_staging_dir = "s3://accessible-bucket",
27
+#'   log_path = "/tmp/athena.log",
28
+#'   log_level = "DEBUG"
29
+#' ) -> ath
25 30
 #'
26 31
 #' dbListTables(ath)
27 32
 #'
@@ -35,17 +40,16 @@ athena_connect <- function(
35 40
   max_error_retries = 10,
36 41
   connection_timeout = 10000,
37 42
   socket_timeout = 10000,
38
-  # retry_base_delay = 100,
39
-  # retry_max_backoff_time = 1000,
40 43
   log_path = "",
41
-  log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) {
44
+  log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"),
45
+  ...
46
+) {
42 47
 
43 48
   athena_jdbc <- Athena()
44 49
 
45 50
   region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2"))
46 51
   log_level <- match.arg(log_level, c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
47 52
 
48
-  # if (!simple) {
49 53
   dbConnect(
50 54
     athena_jdbc,
51 55
     schema_name = default_schema,
@@ -54,15 +58,10 @@ athena_connect <- function(
54 58
     max_error_retries = max_error_retries,
55 59
     connection_timeout = connection_timeout,
56 60
     socket_timeout = socket_timeout,
57
-    # retry_base_delay = retry_base_delay,
58
-    # retry_max_backoff_time = retry_max_backoff_time,
59 61
     log_path = log_path,
60
-    log_level = log_level
62
+    log_level = log_level,
63
+    ...
61 64
   ) -> con
62
-  # } else {
63
-  #   con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema, region = region,
64
-  #                    s3_staging_dir = s3_staging_dir, log_path = log_path, log_level = log_level)
65
-  # }
66 65
 
67 66
   con
68 67
 

+ 10
- 4
R/z-dbGetQuery.R View File

@@ -16,10 +16,11 @@ list(
16 16
   "7" = as.double, # REAL
17 17
   "8" = as.double, # DOUBLE
18 18
   "12" = as.character, # VARCHAR
19
-  "16" = as.logical, # BOOLEAN
19
+  "16" = as_logical, # BOOLEAN
20 20
   "91" = as_date, # DATE
21 21
   "92" = as.character, # TIME
22 22
   "93" = as_posixct, # TIMESTAMP
23
+  "2003" = as.character, # ARRAY
23 24
   "1111" = as.character # OTHER
24 25
 ) -> .jdbc_converters
25 26
 
@@ -71,14 +72,19 @@ setMethod(
71 72
       ct <- as.character(.jcall(res@md, "I", "getColumnType", i))
72 73
       athena_type_convert[[i]] <- .jdbc_converters[[ct]]
73 74
       nms <- c(nms, .jcall(res@md, "S", "getColumnLabel", i))
75
+      # message(ct, "|", tail(nms, 1))
74 76
     }
75 77
 
76 78
     athena_type_convert <- set_names(athena_type_convert, nms)
77 79
 
78 80
     out <- callNextMethod(res = res, n = n, block = block, ...)
79 81
 
82
+    # print(str(out))
83
+
80 84
     for (nm in names(athena_type_convert)) {
81
-      out[[nm]] <- athena_type_convert[[nm]](out[[nm]])
85
+      f <- athena_type_convert[[nm]]
86
+      if (length(f) == 0) f <- as.character # catchall in case AMZN is tricksy
87
+      out[[nm]] <- f(out[[nm]])
82 88
     }
83 89
 
84 90
     out
@@ -98,13 +104,13 @@ setMethod(
98 104
   "dbGetQuery",
99 105
   signature(conn="AthenaConnection", statement="character"),
100 106
 
101
-  definition = function(conn, statement, type_convert=FALSE, ...) {
107
+  definition = function(conn, statement, ...) {
102 108
 
103 109
     r <- dbSendQuery(conn, statement, ...)
104 110
 
105 111
     on.exit(.jcall(r@stat, "V", "close"))
106 112
 
107
-    res <- fetch(r, -1, block = 1000L)
113
+    res <- fetch(r, -1, block = conn@fetch_size)
108 114
 
109 115
     class(res) <- c("tbl_df", "tbl", "data.frame")
110 116
 

+ 8
- 1
R/zzz.R View File

@@ -1,4 +1,11 @@
1 1
 .onLoad <- function(libname, pkgname) {
2 2
   rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
3
-  # rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
3
+  rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
4
+  o <- getOption("java.parameters", "")
5
+  if (!any(grepl("-Xrs", o))) {
6
+    packageStartupMessage(
7
+      "Did not find '-Xrs' in java.parameters option. Until rJava is updated, ",
8
+      "please set this up in your/an Rprofile or at the start of scripts."
9
+    )
10
+  }
4 11
 }

+ 87
- 42
README.Rmd View File

@@ -4,25 +4,14 @@ editor_options:
4 4
   chunk_output_type: console
5 5
 ---
6 6
 
7
-# `metis`
7
+# metis
8 8
 
9
-Helpers for Accessing and Querying Amazon Athena
10
-
11
-Including a lightweight RJDBC shim.
12
-
13
-In Greek mythology, Metis was Athena's "helper".
9
+Access and Query Amazon Athena via DBI/JDBC
14 10
 
15 11
 ## Description
16 12
 
17
-Still fairly beta-quality level but getting there.
18
-
19
-The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connections from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
20
-
21
-The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but that will likely move to a separate package as this gets closer to prime time if this goes on CRAN.
22
-
23
-NOTE that the updated driver *REQUIRES JDK 1.8+*.
24
-
25
-See the **Usage** section for an example.
13
+In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`.
14
+#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
26 15
 
27 16
 ## IMPORTANT
28 17
 
@@ -41,7 +30,7 @@ The following functions are implemented:
41 30
 
42 31
 Easy-interface connection helper:
43 32
 
44
-- `athena_connect`	Make a JDBC connection to Athena
33
+- `athena_connect`	Simplified Athena JDBC connection helper
45 34
 
46 35
 Custom JDBC Classes:
47 36
 
@@ -52,13 +41,13 @@ Custom JDBC Classes:
52 41
 
53 42
 Custom JDBC Class Methods:
54 43
 
55
-- `dbConnect-method`:	AthenaJDBC
56
-- `dbExistsTable-method`:	AthenaJDBC
57
-- `dbGetQuery-method`:	AthenaJDBC
58
-- `dbListFields-method`:	AthenaJDBC
59
-- `dbListTables-method`:	AthenaJDBC
60
-- `dbReadTable-method`:	AthenaJDBC
61
-- `dbSendQuery-method`:	AthenaJDBC
44
+- `dbConnect-method`
45
+- `dbExistsTable-method`
46
+- `dbGetQuery-method`
47
+- `dbListFields-method`
48
+- `dbListTables-method`
49
+- `dbReadTable-method`
50
+- `dbSendQuery-method`
62 51
 
63 52
 Pulled in from other `cloudyr` pkgs: 
64 53
 
@@ -68,41 +57,97 @@ Pulled in from other `cloudyr` pkgs:
68 57
 ## Installation
69 58
 
70 59
 ```{r eval=FALSE}
71
-devtools::install_github("hrbrmstr/metis")
60
+devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
61
+# OR
62
+devtools::install_gitlab("hrbrmstr/metis-lite")
63
+# OR
64
+devtools::install_github("hrbrmstr/metis-lite")
72 65
 ```
73 66
 
74
-```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
67
+```{r message=FALSE, warning=FALSE, include=FALSE}
75 68
 options(width=120)
76 69
 ```
77 70
 
78 71
 ## Usage
79 72
 
80
-```{r message=FALSE, warning=FALSE, error=FALSE}
81
-library(metis)
82
-library(tidyverse)
73
+```{r message=FALSE, warning=FALSE}
74
+library(metis.lite)
83 75
 
84 76
 # current verison
85
-packageVersion("metis")
77
+packageVersion("metis.lite")
86 78
 ```
87 79
 
88
-```{r message=FALSE, warning=FALSE, error=FALSE}
89
-use_credentials("default")
80
+```{r message=FALSE, warning=FALSE}
81
+library(rJava)
82
+library(RJDBC)
83
+library(metis.lite)
84
+library(magrittr)
85
+library(dbplyr)
86
+library(dplyr)
90 87
 
91
-athena_connect(
92
-  default_schema = "sampledb", 
93
-  s3_staging_dir = "s3://accessible-bucket",
94
-  log_path = "/tmp/athena.log",
95
-  log_level = "DEBUG"
96
-) -> ath
88
+dbConnect(
89
+  drv = metis.lite::Athena(),
90
+  schema_name = "sampledb",
91
+  provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
92
+  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
93
+  s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
94
+) -> con
97 95
 
98
-dbListTables(ath, schema="sampledb")
96
+dbListTables(con, schema="sampledb")
99 97
 
100
-dbExistsTable(ath, "elb_logs", schema="sampledb")
98
+dbExistsTable(con, "elb_logs", schema="sampledb")
101 99
 
102
-dbListFields(ath, "elb_logs", "sampledb")
100
+dbListFields(con, "elb_logs", "sampledb")
101
+
102
+dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% 
103
+  glimpse()
104
+```
105
+
106
+### Check types
107
+
108
+```{r}
109
+dbGetQuery(con, "
110
+SELECT
111
+  CAST('chr' AS CHAR(4)) achar,
112
+  CAST('varchr' AS VARCHAR) avarchr,
113
+  CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
114
+  CAST(100.1 AS DOUBLE) AS justadbl,
115
+  CAST(127 AS TINYINT) AS asmallint,
116
+  CAST(100 AS INTEGER) AS justanint,
117
+  CAST(100000000000000000 AS BIGINT) AS abigint,
118
+  CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
119
+  ARRAY[1, 2, 3] AS arr1,
120
+  ARRAY['1', '2, 3', '4'] AS arr2,
121
+  MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
122
+  CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
123
+  CAST('{\"a\":1}' AS JSON) js
124
+FROM elb_logs
125
+LIMIT 1
126
+") %>% 
127
+  glimpse()
128
+```
103 129
 
104
-dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% 
105
-  type_convert() %>% 
130
+#### dplyr
131
+
132
+```{r}
133
+tbl(con, sql("
134
+SELECT
135
+  CAST('chr' AS CHAR(4)) achar,
136
+  CAST('varchr' AS VARCHAR) avarchr,
137
+  CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
138
+  CAST(100.1 AS DOUBLE) AS justadbl,
139
+  CAST(127 AS TINYINT) AS asmallint,
140
+  CAST(100 AS INTEGER) AS justanint,
141
+  CAST(100000000000000000 AS BIGINT) AS abigint,
142
+  CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
143
+  ARRAY[1, 2, 3] AS arr,
144
+  ARRAY['1', '2, 3', '4'] AS arr,
145
+  MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
146
+  CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
147
+  CAST('{\"a\":1}' AS JSON) js
148
+FROM elb_logs
149
+LIMIT 1
150
+")) %>% 
106 151
   glimpse()
107 152
 ```
108 153
 

+ 143
- 64
README.md View File

@@ -1,35 +1,25 @@
1 1
 
2
-# `metis`
2
+# metis
3 3
 
4
-Helpers for Accessing and Querying Amazon Athena
5
-
6
-Including a lightweight RJDBC shim.
7
-
8
-In Greek mythology, Metis was Athena’s “helper”.
4
+Access and Query Amazon Athena via DBI/JDBC
9 5
 
10 6
 ## Description
11 7
 
12
-Still fairly beta-quality level but getting there.
13
-
14
-The goal will be to get around enough of the “gotchas” that are
15
-preventing raw RJDBC Athena connections from “just working” with `dplyr`
16
-v0.6.0+ and also get around the [`fetchSize`
17
-problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/)
18
-without having to not use `dbGetQuery()`.
19
-
20
-The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but
21
-that will likely move to a separate package as this gets closer to prime
22
-time if this goes on CRAN.
23
-
24
-NOTE that the updated driver *REQUIRES JDK 1.8+*.
25
-
26
-See the **Usage** section for an example.
8
+In Greek mythology, Metis was Athena’s “helper” so methods are provided
9
+to help you accessing and querying Amazon Athena via DBI/JDBC and/or
10
+`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
11
+lookup schemas/tables,
27 12
 
28 13
 ## IMPORTANT
29 14
 
30
-Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`:
15
+Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
16
+from interrupting Athena JDBC calls crash the R interpreter. You need to
17
+set the `-Xrs` option to avoid signals being passed on to the JVM owner.
18
+That has to be done *before* `rJava` is loaded so you either need to
19
+remember to put it at the top of all scripts *or* stick this in your
20
+local `~/.Rprofile` and/or sitewide `Rprofile`:
31 21
 
32
-```r
22
+``` r
33 23
 if (!grepl("-Xrs", getOption("java.parameters", ""))) {
34 24
   options(
35 25
     "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
@@ -43,7 +33,7 @@ The following functions are implemented:
43 33
 
44 34
 Easy-interface connection helper:
45 35
 
46
-  - `athena_connect` Make a JDBC connection to Athena
36
+  - `athena_connect` Simplified Athena JDBC connection helper
47 37
 
48 38
 Custom JDBC Classes:
49 39
 
@@ -54,13 +44,13 @@ Custom JDBC Classes:
54 44
 
55 45
 Custom JDBC Class Methods:
56 46
 
57
-  - `dbConnect-method`: AthenaJDBC
58
-  - `dbExistsTable-method`: AthenaJDBC
59
-  - `dbGetQuery-method`: AthenaJDBC
60
-  - `dbListFields-method`: AthenaJDBC
61
-  - `dbListTables-method`: AthenaJDBC
62
-  - `dbReadTable-method`: AthenaJDBC
63
-  - `dbSendQuery-method`: AthenaJDBC
47
+  - `dbConnect-method`
48
+  - `dbExistsTable-method`
49
+  - `dbGetQuery-method`
50
+  - `dbListFields-method`
51
+  - `dbListTables-method`
52
+  - `dbReadTable-method`
53
+  - `dbSendQuery-method`
64 54
 
65 55
 Pulled in from other `cloudyr` pkgs:
66 56
 
@@ -70,44 +60,53 @@ Pulled in from other `cloudyr` pkgs:
70 60
 ## Installation
71 61
 
72 62
 ``` r
73
-devtools::install_github("hrbrmstr/metis")
63
+devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
64
+# OR
65
+devtools::install_gitlab("hrbrmstr/metis-lite")
66
+# OR
67
+devtools::install_github("hrbrmstr/metis-lite")
74 68
 ```
75 69
 
76 70
 ## Usage
77 71
 
78 72
 ``` r
79
-library(metis)
80
-library(tidyverse)
73
+library(metis.lite)
81 74
 
82 75
 # current verison
83
-packageVersion("metis")
76
+packageVersion("metis.lite")
84 77
 ```
85 78
 
86 79
     ## [1] '0.3.0'
87 80
 
88 81
 ``` r
89
-use_credentials("default")
90
-
91
-athena_connect(
92
-  default_schema = "sampledb", 
93
-  s3_staging_dir = "s3://accessible-bucket",
94
-  log_path = "/tmp/athena.log",
95
-  log_level = "DEBUG"
96
-) -> ath
97
-
98
-dbListTables(ath, schema="sampledb")
82
+library(rJava)
83
+library(RJDBC)
84
+library(metis.lite)
85
+library(magrittr)
86
+library(dbplyr)
87
+library(dplyr)
88
+
89
+dbConnect(
90
+  drv = metis.lite::Athena(),
91
+  schema_name = "sampledb",
92
+  provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
93
+  AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
94
+  s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
95
+) -> con
96
+
97
+dbListTables(con, schema="sampledb")
99 98
 ```
100 99
 
101 100
     ## [1] "elb_logs"
102 101
 
103 102
 ``` r
104
-dbExistsTable(ath, "elb_logs", schema="sampledb")
103
+dbExistsTable(con, "elb_logs", schema="sampledb")
105 104
 ```
106 105
 
107 106
     ## [1] TRUE
108 107
 
109 108
 ``` r
110
-dbListFields(ath, "elb_logs", "sampledb")
109
+dbListFields(con, "elb_logs", "sampledb")
111 110
 ```
112 111
 
113 112
     ##  [1] "timestamp"             "elbname"               "requestip"             "requestport"          
@@ -116,29 +115,109 @@ dbListFields(ath, "elb_logs", "sampledb")
116 115
     ## [13] "sentbytes"             "requestverb"           "url"                   "protocol"
117 116
 
118 117
 ``` r
119
-dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% 
120
-  type_convert() %>% 
118
+dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>% 
121 119
   glimpse()
122 120
 ```
123 121
 
124 122
     ## Observations: 10
125 123
     ## Variables: 16
126
-    ## $ timestamp             <dttm> 2014-09-30 01:28:17, 2014-09-30 00:01:30, 2014-09-30 00:01:30, 2014-09-30 00:01:30, ...
127
-    ## $ elbname               <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo...
128
-    ## $ requestip             <chr> "246.140.190.136", "240.109.129.138", "242.251.232.153", "253.227.207.81", "253.227.2...
129
-    ## $ requestport           <dbl> 63777, 22705, 22705, 22705, 23282, 24178, 22916, 23807, 22916, 21443
130
-    ## $ backendip             <chr> "250.193.168.100", "251.103.130.45", "243.140.114.254", "243.82.95.243", "246.129.102...
131
-    ## $ backendport           <dbl> 8888, 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888
132
-    ## $ requestprocessingtime <dbl> 7.2e-05, 6.9e-05, 8.7e-05, 9.7e-05, 8.1e-05, 4.6e-05, 4.3e-05, 5.3e-05, 5.5e-05, 4.4e-05
133
-    ## $ backendprocessingtime <dbl> 0.379241, 0.007541, 0.187126, 0.413337, 0.037030, 0.050222, 0.043706, 0.045953, 0.015...
134
-    ## $ clientresponsetime    <dbl> 8.0e-05, 4.3e-05, 7.5e-05, 8.7e-05, 4.5e-05, 3.3e-05, 3.3e-05, 6.9e-05, 8.5e-05, 4.9e-05
135
-    ## $ elbresponsecode       <int> 200, 302, 302, 200, 200, 200, 200, 200, 200, 200
136
-    ## $ backendresponsecode   <int> 200, 200, 200, 400, 200, 200, 200, 404, 200, 200
137
-    ## $ receivedbytes         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
138
-    ## $ sentbytes             <dbl> 58402, 0, 0, 58402, 32370, 20766, 3408, 152213, 84245, 3884
124
+    ## $ timestamp             <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725…
125
+    ## $ elbname               <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
126
+    ## $ requestip             <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2…
127
+    ## $ requestport           <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
128
+    ## $ backendip             <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2…
129
+    ## $ backendport           <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
130
+    ## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
131
+    ## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518…
132
+    ## $ clientresponsetime    <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007…
133
+    ## $ elbresponsecode       <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
134
+    ## $ backendresponsecode   <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
135
+    ## $ receivedbytes         <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
136
+    ## $ sentbytes             <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
139 137
     ## $ requestverb           <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
140
-    ## $ url                   <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/a...
141
-    ## $ protocol              <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "...
138
+    ## $ url                   <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
139
+    ## $ protocol              <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
140
+
141
+### Check types
142
+
143
+``` r
144
+dbGetQuery(con, "
145
+SELECT
146
+  CAST('chr' AS CHAR(4)) achar,
147
+  CAST('varchr' AS VARCHAR) avarchr,
148
+  CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
149
+  CAST(100.1 AS DOUBLE) AS justadbl,
150
+  CAST(127 AS TINYINT) AS asmallint,
151
+  CAST(100 AS INTEGER) AS justanint,
152
+  CAST(100000000000000000 AS BIGINT) AS abigint,
153
+  CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
154
+  ARRAY[1, 2, 3] AS arr1,
155
+  ARRAY['1', '2, 3', '4'] AS arr2,
156
+  MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
157
+  CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
158
+  CAST('{\"a\":1}' AS JSON) js
159
+FROM elb_logs
160
+LIMIT 1
161
+") %>% 
162
+  glimpse()
163
+```
164
+
165
+    ## Observations: 1
166
+    ## Variables: 13
167
+    ## $ achar     <chr> "chr "
168
+    ## $ avarchr   <chr> "varchr"
169
+    ## $ tsday     <date> 2014-09-26
170
+    ## $ justadbl  <dbl> 100.1
171
+    ## $ asmallint <int> 127
172
+    ## $ justanint <int> 100
173
+    ## $ abigint   <S3: integer64> 100000000000000000
174
+    ## $ is_get    <lgl> TRUE
175
+    ## $ arr1      <chr> "1, 2, 3"
176
+    ## $ arr2      <chr> "1, 2, 3, 4"
177
+    ## $ mp        <chr> "{bar=2, foo=1}"
178
+    ## $ rw        <chr> "{x=1, y=2.0}"
179
+    ## $ js        <chr> "\"{\\\"a\\\":1}\""
180
+
181
+#### dplyr
182
+
183
+``` r
184
+tbl(con, sql("
185
+SELECT
186
+  CAST('chr' AS CHAR(4)) achar,
187
+  CAST('varchr' AS VARCHAR) avarchr,
188
+  CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
189
+  CAST(100.1 AS DOUBLE) AS justadbl,
190
+  CAST(127 AS TINYINT) AS asmallint,
191
+  CAST(100 AS INTEGER) AS justanint,
192
+  CAST(100000000000000000 AS BIGINT) AS abigint,
193
+  CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
194
+  ARRAY[1, 2, 3] AS arr,
195
+  ARRAY['1', '2, 3', '4'] AS arr,
196
+  MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
197
+  CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
198
+  CAST('{\"a\":1}' AS JSON) js
199
+FROM elb_logs
200
+LIMIT 1
201
+")) %>% 
202
+  glimpse()
203
+```
204
+
205
+    ## Observations: ??
206
+    ## Variables: 13
207
+    ## Database: AthenaConnection
208
+    ## $ achar     <chr> "chr "
209
+    ## $ avarchr   <chr> "varchr"
210
+    ## $ tsday     <date> 2014-09-27
211
+    ## $ justadbl  <dbl> 100.1
212
+    ## $ asmallint <int> 127
213
+    ## $ justanint <int> 100
214
+    ## $ abigint   <S3: integer64> 100000000000000000
215
+    ## $ is_get    <lgl> TRUE
216
+    ## $ arr       <chr> "1, 2, 3"
217
+    ## $ arr       <chr> "1, 2, 3, 4"
218
+    ## $ mp        <chr> "{bar=2, foo=1}"
219
+    ## $ rw        <chr> "{x=1, y=2.0}"
220
+    ## $ js        <chr> "\"{\\\"a\\\":1}\""
142 221
 
143 222
 ## Code of Conduct
144 223
 

+ 14
- 6
man/athena_connect.Rd View File

@@ -2,14 +2,14 @@
2 2
 % Please edit documentation in R/metis.r
3 3
 \name{athena_connect}
4 4
 \alias{athena_connect}
5
-\title{Make a JDBC connection to Athena}
5
+\title{Simplified Athena JDBC connection helper}
6 6
 \usage{
7 7
 athena_connect(default_schema = "default", region = c("us-east-1",
8 8
   "us-east-2", "us-west-2"),
9 9
   s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
10 10
   max_error_retries = 10, connection_timeout = 10000,
11 11
   socket_timeout = 10000, log_path = "", log_level = c("OFF",
12
-  "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
12
+  "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...)
13 13
 }
14 14
 \arguments{
15 15
 \item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
@@ -28,6 +28,8 @@ athena_connect(default_schema = "default", region = c("us-east-1",
28 28
 
29 29
 \item{log_level}{log level of the Athena JDBC driver logs. Use  names
30 30
 "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}
31
+
32
+\item{...}{passed on to the driver}
31 33
 }
32 34
 \description{
33 35
 Handles the up-front JDBC config
@@ -36,10 +38,12 @@ Handles the up-front JDBC config
36 38
 \dontrun{
37 39
 use_credentials("personal")
38 40
 
39
-ath <- athena_connect(default_schema = "sampledb",
40
-                      s3_staging_dir = "s3://accessible-bucket",
41
-                      log_path = "/tmp/athena.log",
42
-                      log_level = "DEBUG")
41
+athena_connect(
42
+  default_schema = "sampledb",
43
+  s3_staging_dir = "s3://accessible-bucket",
44
+  log_path = "/tmp/athena.log",
45
+  log_level = "DEBUG"
46
+) -> ath
43 47
 
44 48
 dbListTables(ath)
45 49
 
@@ -47,3 +51,7 @@ dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")
47 51
 
48 52
 }
49 53
 }
54
+\references{
55
+\href{https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html}{Connect with JDBC};
56
+\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
57
+}

+ 15
- 6
man/dbConnect-AthenaDriver-method.Rd View File

@@ -9,9 +9,9 @@
9 9
   provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
10 10
   region = "us-east-1",
11 11
   s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
12
-  schema_name = "default", max_error_retries = 10,
13
-  connection_timeout = 10000, socket_timeout = 10000, log_path = "",
14
-  log_level = 0, ...)
12
+  schema_name = "default", fetch_size = 1000L,
13
+  max_error_retries = 10, connection_timeout = 10000,
14
+  socket_timeout = 10000, log_path = "", log_level = 0, ...)
15 15
 }
16 16
 \arguments{
17 17
 \item{provider}{JDBC auth provider (ideally leave default)}
@@ -29,11 +29,20 @@ of data in logs. Set this to a temporary directory or something log4j can use. F
29 29
 `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
30 30
 their corresponding integer values 0-6.}
31 31
 
32
-\item{...}{unused}
32
+\item{...}{passed on to the driver. See Details.}
33 33
 }
34 34
 \description{
35
-AthenaJDBC
35
+Connect to Athena
36 36
 }
37
+\section{Driver Configuration Options}{
38
+
39
+
40
+- `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
41
+- `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
42
+- `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
43
+}
44
+
37 45
 \references{
38
-<https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
46
+[Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
47
+    [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
39 48
 }

+ 1
- 2
man/dbGetQuery-AthenaConnection-character-method.Rd View File

@@ -5,8 +5,7 @@
5 5
 \alias{dbGetQuery,AthenaConnection,character-method}
6 6
 \title{AthenaJDBC}
7 7
 \usage{
8
-\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement,
9
-  type_convert = FALSE, ...)
8
+\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
10 9
 }
11 10
 \arguments{
12 11
 \item{conn}{Athena connection}

+ 4
- 1
man/metis.lite.Rd View File

@@ -5,7 +5,7 @@
5 5
 \name{metis.lite}
6 6
 \alias{metis.lite}
7 7
 \alias{metis.lite-package}
8
-\title{Helpers for Accessing and Querying Amazon Athena}
8
+\title{Access and Query Amazon Athena via DBI/JDBC}
9 9
 \description{
10 10
 Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
11 11
 perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@@ -26,6 +26,9 @@ in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatte
26 26
 }
27 27
 }
28 28
 
29
+\references{
30
+\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
31
+}
29 32
 \author{
30 33
 Bob Rudis (bob@rud.is)
31 34
 }

Loading…
Cancel
Save