Browse Source

dplyr bits working

master
boB Rudis 5 years ago
parent
commit
646d4938e8
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 4
      DESCRIPTION
  2. 4
      R/a-utils.R
  3. 24
      R/jdbc.r
  4. 3
      R/metis-lite-package.R
  5. 27
      R/metis.r
  6. 14
      R/z-dbGetQuery.R
  7. 9
      R/zzz.R
  8. 129
      README.Rmd
  9. 205
      README.md
  10. 16
      man/athena_connect.Rd
  11. 21
      man/dbConnect-AthenaDriver-method.Rd
  12. 3
      man/dbGetQuery-AthenaConnection-character-method.Rd
  13. 5
      man/metis.lite.Rd

4
DESCRIPTION

@ -1,6 +1,6 @@
Package: metis.lite
Type: Package
Title: Helpers for Accessing and Querying Amazon Athena
Title: Access and Query Amazon Athena via DBI/JDBC
Version: 0.3.0
Date: 2019-02-14
Authors@R: c(
@ -11,7 +11,7 @@ Maintainer: Bob Rudis <bob@rud.is>
Encoding: UTF-8
Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with an interface to the 'AWS' command-line utility.
is included along with additional helpers for 'dplyr'/'dplyr' suppprt.
SystemRequirements: JDK 1.8+
License: MIT + file LICENSE
Suggests:

4
R/a-utils.R

@ -3,6 +3,10 @@ set_names <- function (object = nm, nm) {
object
}
as_logical <- function(x) {
as.logical(as.integer(x))
}
as_date <- function(x) {
as.Date(x, origin = "1970-01-01")
}

24
R/jdbc.r

@ -36,6 +36,14 @@ Athena <- function(identifier.quote = '`') {
#' AthenaJDBC
#'
#' Connect to Athena
#'
#' @section Driver Configuration Options:
#'
#' - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
#' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
#' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
#'
#' @param provider JDBC auth provider (ideally leave default)
#' @param region AWS region the Athena tables are in
#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
@ -46,8 +54,9 @@ Athena <- function(identifier.quote = '`') {
#' of data in logs. Set this to a temporary directory or something log4j can use. For
#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
#' their corresponding integer values 0-6.
#' @param ... unused
#' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
#' @param ... passed on to the driver. See Details.
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @export
setMethod(
@ -60,6 +69,7 @@ setMethod(
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default",
fetch_size = 1000L,
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
@ -87,7 +97,11 @@ setMethod(
...
) -> jc
return(as(jc, "AthenaConnection"))
jc <- as(jc, "AthenaConnection")
jc@fetch_size <- as.integer(fetch_size)
return(jc)
}
@ -96,7 +110,9 @@ setMethod(
#' AthenaJDBC
#'
#' @export
setClass("AthenaConnection", contains = "JDBCConnection")
setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer"))
# setClass("AthenaConnection", contains = "JDBCConnection")
#' AthenaJDBC
#'

3
R/metis-lite-package.R

@ -1,4 +1,4 @@
#' Helpers for Accessing and Querying Amazon Athena
#' Access and Query Amazon Athena via DBI/JDBC
#'
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@ -27,6 +27,7 @@
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC DBI bit64 dbplyr
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL

27
R/metis.r

@ -1,4 +1,4 @@
#' Make a JDBC connection to Athena
#' Simplified Athena JDBC connection helper
#'
#' Handles the up-front JDBC config
#'
@ -14,14 +14,19 @@
#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
#' @param log_level log level of the Athena JDBC driver logs. Use names
#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
#' @param ... passed on to the driver
#' @export
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @examples \dontrun{
#' use_credentials("personal")
#'
#' ath <- athena_connect(default_schema = "sampledb",
#' athena_connect(
#' default_schema = "sampledb",
#' s3_staging_dir = "s3://accessible-bucket",
#' log_path = "/tmp/athena.log",
#' log_level = "DEBUG")
#' log_level = "DEBUG"
#' ) -> ath
#'
#' dbListTables(ath)
#'
@ -35,17 +40,16 @@ athena_connect <- function(
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
# retry_base_delay = 100,
# retry_max_backoff_time = 1000,
log_path = "",
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) {
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"),
...
) {
athena_jdbc <- Athena()
region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2"))
log_level <- match.arg(log_level, c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
# if (!simple) {
dbConnect(
athena_jdbc,
schema_name = default_schema,
@ -54,15 +58,10 @@ athena_connect <- function(
max_error_retries = max_error_retries,
connection_timeout = connection_timeout,
socket_timeout = socket_timeout,
# retry_base_delay = retry_base_delay,
# retry_max_backoff_time = retry_max_backoff_time,
log_path = log_path,
log_level = log_level
log_level = log_level,
...
) -> con
# } else {
# con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema, region = region,
# s3_staging_dir = s3_staging_dir, log_path = log_path, log_level = log_level)
# }
con

14
R/z-dbGetQuery.R

@ -16,10 +16,11 @@ list(
"7" = as.double, # REAL
"8" = as.double, # DOUBLE
"12" = as.character, # VARCHAR
"16" = as.logical, # BOOLEAN
"16" = as_logical, # BOOLEAN
"91" = as_date, # DATE
"92" = as.character, # TIME
"93" = as_posixct, # TIMESTAMP
"2003" = as.character, # ARRAY
"1111" = as.character # OTHER
) -> .jdbc_converters
@ -71,14 +72,19 @@ setMethod(
ct <- as.character(.jcall(res@md, "I", "getColumnType", i))
athena_type_convert[[i]] <- .jdbc_converters[[ct]]
nms <- c(nms, .jcall(res@md, "S", "getColumnLabel", i))
# message(ct, "|", tail(nms, 1))
}
athena_type_convert <- set_names(athena_type_convert, nms)
out <- callNextMethod(res = res, n = n, block = block, ...)
# print(str(out))
for (nm in names(athena_type_convert)) {
out[[nm]] <- athena_type_convert[[nm]](out[[nm]])
f <- athena_type_convert[[nm]]
if (length(f) == 0) f <- as.character # catchall in case AMZN is tricksy
out[[nm]] <- f(out[[nm]])
}
out
@ -98,13 +104,13 @@ setMethod(
"dbGetQuery",
signature(conn="AthenaConnection", statement="character"),
definition = function(conn, statement, type_convert=FALSE, ...) {
definition = function(conn, statement, ...) {
r <- dbSendQuery(conn, statement, ...)
on.exit(.jcall(r@stat, "V", "close"))
res <- fetch(r, -1, block = 1000L)
res <- fetch(r, -1, block = conn@fetch_size)
class(res) <- c("tbl_df", "tbl", "data.frame")

9
R/zzz.R

@ -1,4 +1,11 @@
.onLoad <- function(libname, pkgname) {
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
# rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
o <- getOption("java.parameters", "")
if (!any(grepl("-Xrs", o))) {
packageStartupMessage(
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ",
"please set this up in your/an Rprofile or at the start of scripts."
)
}
}

129
README.Rmd

@ -4,25 +4,14 @@ editor_options:
chunk_output_type: console
---
# `metis`
# metis
Helpers for Accessing and Querying Amazon Athena
Including a lightweight RJDBC shim.
In Greek mythology, Metis was Athena's "helper".
Access and Query Amazon Athena via DBI/JDBC
## Description
Still fairly beta-quality level but getting there.
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connections from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but that will likely move to a separate package as this gets closer to prime time if this goes on CRAN.
NOTE that the updated driver *REQUIRES JDK 1.8+*.
See the **Usage** section for an example.
In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`.
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
## IMPORTANT
@ -41,7 +30,7 @@ The following functions are implemented:
Easy-interface connection helper:
- `athena_connect` Make a JDBC connection to Athena
- `athena_connect` Simplified Athena JDBC connection helper
Custom JDBC Classes:
@ -52,13 +41,13 @@ Custom JDBC Classes:
Custom JDBC Class Methods:
- `dbConnect-method`: AthenaJDBC
- `dbExistsTable-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbListFields-method`: AthenaJDBC
- `dbListTables-method`: AthenaJDBC
- `dbReadTable-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`
Pulled in from other `cloudyr` pkgs:
@ -68,41 +57,97 @@ Pulled in from other `cloudyr` pkgs:
## Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/metis")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
# OR
devtools::install_github("hrbrmstr/metis-lite")
```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
```{r message=FALSE, warning=FALSE, include=FALSE}
options(width=120)
```
## Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
library(metis)
library(tidyverse)
```{r message=FALSE, warning=FALSE}
library(metis.lite)
# current verison
packageVersion("metis")
packageVersion("metis.lite")
```
```{r message=FALSE, warning=FALSE, error=FALSE}
use_credentials("default")
```{r message=FALSE, warning=FALSE}
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
dbListTables(ath, schema="sampledb")
dbListTables(con, schema="sampledb")
dbExistsTable(ath, "elb_logs", schema="sampledb")
dbExistsTable(con, "elb_logs", schema="sampledb")
dbListFields(ath, "elb_logs", "sampledb")
dbListFields(con, "elb_logs", "sampledb")
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
```
### Check types
```{r}
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
glimpse()
```
dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
type_convert() %>%
#### dplyr
```{r}
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
```

205
README.md

@ -1,33 +1,23 @@
# `metis`
# metis
Helpers for Accessing and Querying Amazon Athena
Including a lightweight RJDBC shim.
In Greek mythology, Metis was Athena’s “helper”.
Access and Query Amazon Athena via DBI/JDBC
## Description
Still fairly beta-quality level but getting there.
The goal will be to get around enough of the “gotchas” that are
preventing raw RJDBC Athena connections from “just working” with `dplyr`
v0.6.0+ and also get around the [`fetchSize`
problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/)
without having to not use `dbGetQuery()`.
The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but
that will likely move to a separate package as this gets closer to prime
time if this goes on CRAN.
NOTE that the updated driver *REQUIRES JDK 1.8+*.
See the **Usage** section for an example.
In Greek mythology, Metis was Athena’s “helper” so methods are provided
to help you accessing and querying Amazon Athena via DBI/JDBC and/or
`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
lookup schemas/tables,
## IMPORTANT
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`:
Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
from interrupting Athena JDBC calls crash the R interpreter. You need to
set the `-Xrs` option to avoid signals being passed on to the JVM owner.
That has to be done *before* `rJava` is loaded so you either need to
remember to put it at the top of all scripts *or* stick this in your
local `~/.Rprofile` and/or sitewide `Rprofile`:
``` r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
@ -43,7 +33,7 @@ The following functions are implemented:
Easy-interface connection helper:
- `athena_connect` Make a JDBC connection to Athena
- `athena_connect` Simplified Athena JDBC connection helper
Custom JDBC Classes:
@ -54,13 +44,13 @@ Custom JDBC Classes:
Custom JDBC Class Methods:
- `dbConnect-method`: AthenaJDBC
- `dbExistsTable-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbListFields-method`: AthenaJDBC
- `dbListTables-method`: AthenaJDBC
- `dbReadTable-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`
Pulled in from other `cloudyr` pkgs:
@ -70,44 +60,53 @@ Pulled in from other `cloudyr` pkgs:
## Installation
``` r
devtools::install_github("hrbrmstr/metis")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
# OR
devtools::install_github("hrbrmstr/metis-lite")
```
## Usage
``` r
library(metis)
library(tidyverse)
library(metis.lite)
# current verison
packageVersion("metis")
packageVersion("metis.lite")
```
## [1] '0.3.0'
``` r
use_credentials("default")
athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath
dbListTables(ath, schema="sampledb")
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
dbListTables(con, schema="sampledb")
```
## [1] "elb_logs"
``` r
dbExistsTable(ath, "elb_logs", schema="sampledb")
dbExistsTable(con, "elb_logs", schema="sampledb")
```
## [1] TRUE
``` r
dbListFields(ath, "elb_logs", "sampledb")
dbListFields(con, "elb_logs", "sampledb")
```
## [1] "timestamp" "elbname" "requestip" "requestport"
@ -116,29 +115,109 @@ dbListFields(ath, "elb_logs", "sampledb")
## [13] "sentbytes" "requestverb" "url" "protocol"
``` r
dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
type_convert() %>%
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
```
## Observations: 10
## Variables: 16
## $ timestamp <dttm> 2014-09-30 01:28:17, 2014-09-30 00:01:30, 2014-09-30 00:01:30, 2014-09-30 00:01:30, ...
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo...
## $ requestip <chr> "246.140.190.136", "240.109.129.138", "242.251.232.153", "253.227.207.81", "253.227.2...
## $ requestport <dbl> 63777, 22705, 22705, 22705, 23282, 24178, 22916, 23807, 22916, 21443
## $ backendip <chr> "250.193.168.100", "251.103.130.45", "243.140.114.254", "243.82.95.243", "246.129.102...
## $ backendport <dbl> 8888, 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 7.2e-05, 6.9e-05, 8.7e-05, 9.7e-05, 8.1e-05, 4.6e-05, 4.3e-05, 5.3e-05, 5.5e-05, 4.4e-05
## $ backendprocessingtime <dbl> 0.379241, 0.007541, 0.187126, 0.413337, 0.037030, 0.050222, 0.043706, 0.045953, 0.015...
## $ clientresponsetime <dbl> 8.0e-05, 4.3e-05, 7.5e-05, 8.7e-05, 4.5e-05, 3.3e-05, 3.3e-05, 6.9e-05, 8.5e-05, 4.9e-05
## $ elbresponsecode <int> 200, 302, 302, 200, 200, 200, 200, 200, 200, 200
## $ backendresponsecode <int> 200, 200, 200, 400, 200, 200, 200, 404, 200, 200
## $ receivedbytes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <dbl> 58402, 0, 0, 58402, 32370, 20766, 3408, 152213, 84245, 3884
## $ timestamp <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725…
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
## $ requestip <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2…
## $ requestport <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
## $ backendip <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2…
## $ backendport <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518…
## $ clientresponsetime <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007…
## $ elbresponsecode <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/a...
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "...
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
### Check types
``` r
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
glimpse()
```
## Observations: 1
## Variables: 13
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-26
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr1 <chr> "1, 2, 3"
## $ arr2 <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
#### dplyr
``` r
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
```
## Observations: ??
## Variables: 13
## Database: AthenaConnection
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-27
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr <chr> "1, 2, 3"
## $ arr <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
## Code of Conduct

16
man/athena_connect.Rd

@ -2,14 +2,14 @@
% Please edit documentation in R/metis.r
\name{athena_connect}
\alias{athena_connect}
\title{Make a JDBC connection to Athena}
\title{Simplified Athena JDBC connection helper}
\usage{
athena_connect(default_schema = "default", region = c("us-east-1",
"us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = c("OFF",
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...)
}
\arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
@ -28,6 +28,8 @@ athena_connect(default_schema = "default", region = c("us-east-1",
\item{log_level}{log level of the Athena JDBC driver logs. Use names
"OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}
\item{...}{passed on to the driver}
}
\description{
Handles the up-front JDBC config
@ -36,10 +38,12 @@ Handles the up-front JDBC config
\dontrun{
use_credentials("personal")
ath <- athena_connect(default_schema = "sampledb",
athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG")
log_level = "DEBUG"
) -> ath
dbListTables(ath)
@ -47,3 +51,7 @@ dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")
}
}
\references{
\href{https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html}{Connect with JDBC};
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}

21
man/dbConnect-AthenaDriver-method.Rd

@ -9,9 +9,9 @@
provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default", max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, log_path = "",
log_level = 0, ...)
schema_name = "default", fetch_size = 1000L,
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = 0, ...)
}
\arguments{
\item{provider}{JDBC auth provider (ideally leave default)}
@ -29,11 +29,20 @@ of data in logs. Set this to a temporary directory or something log4j can use. F
`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
their corresponding integer values 0-6.}
\item{...}{unused}
\item{...}{passed on to the driver. See Details.}
}
\description{
AthenaJDBC
Connect to Athena
}
\section{Driver Configuration Options}{
- `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
- `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
- `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
}
\references{
<https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
[Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
[Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
}

3
man/dbGetQuery-AthenaConnection-character-method.Rd

@ -5,8 +5,7 @@
\alias{dbGetQuery,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement,
type_convert = FALSE, ...)
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
}
\arguments{
\item{conn}{Athena connection}

5
man/metis.lite.Rd

@ -5,7 +5,7 @@
\name{metis.lite}
\alias{metis.lite}
\alias{metis.lite-package}
\title{Helpers for Accessing and Querying Amazon Athena}
\title{Access and Query Amazon Athena via DBI/JDBC}
\description{
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@ -26,6 +26,9 @@ in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatte
}
}
\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}

Loading…
Cancel
Save