Browse Source

initial commit

master
boB Rudis 4 years ago
parent
commit
4df423d9c6
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 3
      .Rbuildignore
  2. 19
      DESCRIPTION
  3. 23
      NAMESPACE
  4. 223
      R/jdbc.r
  5. 48
      R/metis-lite-package.R
  6. 15
      R/metis-tidy-package.R
  7. 68
      R/metis.r
  8. 14
      R/sql_translate_env.R
  9. 121
      R/z-dbGetQuery.R
  10. 9
      R/zzz.R
  11. 148
      README.Rmd
  12. 207
      README.md
  13. 18
      man/Athena.Rd
  14. 9
      man/AthenaConnection-class.Rd
  15. 9
      man/AthenaDriver-class.Rd
  16. 9
      man/AthenaResult-class.Rd
  17. 57
      man/athena_connect.Rd
  18. 48
      man/dbConnect-AthenaDriver-method.Rd
  19. 22
      man/dbExistsTable-AthenaConnection-character-method.Rd
  20. 19
      man/dbGetQuery-AthenaConnection-character-method.Rd
  21. 22
      man/dbListFields-AthenaConnection-character-method.Rd
  22. 22
      man/dbListTables-AthenaConnection-method.Rd
  23. 21
      man/dbReadTable-AthenaConnection-character-method.Rd
  24. 19
      man/dbSendQuery-AthenaConnection-character-method.Rd
  25. 25
      man/dbplyr-interface.Rd
  26. 24
      man/fetch-AthenaResult-numeric-method.Rd
  27. 35
      man/metis.lite.Rd
  28. 19
      man/metis.tidy.Rd
  29. 12
      man/use_credentials.Rd
  30. 0
      metis-tidy.Rproj
  31. 2
      tests/test-all.R
  32. 0
      tests/testthat/test-metis.tidy.R

3
.Rbuildignore

@ -8,5 +8,6 @@
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$
^docs$
^CONDUCT\.md$
^\.bash_profile$

19
DESCRIPTION

@ -1,6 +1,6 @@
Package: metis.lite
Package: metis.tidy
Type: Package
Title: Access and Query Amazon Athena via DBI/JDBC
Title: Access and Query Amazon Athena via the Tidyverse
Version: 0.3.0
Date: 2019-02-14
Authors@R: c(
@ -9,9 +9,8 @@ Authors@R: c(
)
Maintainer: Bob Rudis <bob@rud.is>
Encoding: UTF-8
Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with additional helpers for 'dplyr'/'dplyr' suppprt.
Description: Methods are provided to use the 'metis' JDBC/DBI interface via
the Tidyverse (e.g. 'dbplyr'/'dplyr' idioms).
SystemRequirements: JDK 1.8+
License: MIT + file LICENSE
Suggests:
@ -19,11 +18,11 @@ Suggests:
covr
Depends:
R (>= 3.2.0),
RJDBC
metis
Imports:
rJava,
DBI,
bit64,
dbplyr,
aws.signature
dplyr,
dbplyr
RoxygenNote: 6.1.1
Remotes:
hrbrmstr/metis

23
NAMESPACE

@ -1,27 +1,8 @@
# Generated by roxygen2: do not edit by hand
export(Athena)
export(athena_connect)
export(db_data_type.AthenaConnection)
export(read_credentials)
export(sql_translate_env.AthenaConnection)
export(use_credentials)
exportClasses(AthenaConnection)
exportClasses(AthenaDriver)
exportClasses(AthenaResult)
exportMethods(dbConnect)
exportMethods(dbExistsTable)
exportMethods(dbGetInfo)
exportMethods(dbGetQuery)
exportMethods(dbListFields)
exportMethods(dbListTables)
exportMethods(dbReadTable)
exportMethods(dbSendQuery)
exportMethods(fetch)
import(DBI)
import(RJDBC)
import(bit64)
import(dbplyr)
importFrom(aws.signature,read_credentials)
importFrom(aws.signature,use_credentials)
importFrom(rJava,.jcall)
import(metis)
importFrom(dplyr,tbl)

223
R/jdbc.r

@ -1,223 +0,0 @@
structure(
0:6,
.Names = c(
"OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"
)
)-> .ll_trans
#' AthenaJDBC
#'
#' @export
setClass(
"AthenaDriver",
representation(
"JDBCDriver",
identifier.quote = "character",
jdrv = "jobjRef"
)
)
#' AthenaJDBC
#'
#' @export
Athena <- function(identifier.quote = '`') {
JDBC(
driverClass = "com.simba.athena.jdbc.Driver",
system.file("java", "AthenaJDBC42_2.0.6.jar", package = "metis.lite"),
identifier.quote = identifier.quote
) -> drv
return(as(drv, "AthenaDriver"))
}
#' AthenaJDBC
#'
#' Connect to Athena
#'
#' @section Driver Configuration Options:
#'
#' - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
#' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
#' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
#'
#' @param provider JDBC auth provider (ideally leave default)
#' @param region AWS region the Athena tables are in
#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
#' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver
#' @param max_error_retries,connection_timeout,socket_timeout
#' technical connection info that you should only muck with if you know what you're doing.
#' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit
#' of data in logs. Set this to a temporary directory or something log4j can use. For
#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
#' their corresponding integer values 0-6.
#' @param ... passed on to the driver. See Details.
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @export
setMethod(
"dbConnect",
"AthenaDriver",
def = function(
drv,
provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default",
fetch_size = 1000L,
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
log_path = "",
log_level = 0,
...) {
conn_string = sprintf(
'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name
)
if (!(log_level %in% 0:6)) log_level <- .ll_trans[log_level]
callNextMethod(
drv,
conn_string,
S3OutputLocation = s3_staging_dir,
Schema = schema_name,
MaxErrorRetry = max_error_retries,
ConnectTimeout = connection_timeout,
SocketTimeout = socket_timeout,
LogPath = log_path,
LogLevel = log_level,
AwsCredentialsProviderClass = provider,
...
) -> jc
jc <- as(jc, "AthenaConnection")
jc@fetch_size <- as.integer(fetch_size)
return(jc)
}
)
#' AthenaJDBC
#'
#' @export
setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer"))
# setClass("AthenaConnection", contains = "JDBCConnection")
#' AthenaJDBC
#'
#' @export
setClass("AthenaResult", contains = "JDBCResult")
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param statement SQL statement
#' @param ... unused
#' @export
setMethod(
"dbSendQuery",
signature(conn="AthenaConnection", statement="character"),
definition = function(conn, statement, ...) {
return(as(callNextMethod(), "AthenaResult"))
}
)
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param pattern table name pattern
#' @param schema Athena schema name
#' @param ... unused
#' @export
setMethod(
"dbListTables",
signature(conn="AthenaConnection"),
definition = function(conn, pattern='*', schema, ...) {
if (missing(pattern)) {
dbGetQuery(
conn, sprintf("SHOW TABLES IN %s", schema)
) -> x
} else {
dbGetQuery(
conn, sprintf("SHOW TABLES IN %s %s", schema, dbQuoteString(conn, pattern))
) -> x
}
x$tab_name
}
)
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param name table name
#' @param schema Athena schema name
#' @param ... unused
#' @export
setMethod(
"dbExistsTable",
signature(conn="AthenaConnection", name="character"),
definition = function(conn, name, schema, ...) {
length(dbListTables(conn, schema=schema, pattern=name)) > 0
}
)
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param name table name
#' @param schema Athena schema name
#' @param ... unused
#' @export
setMethod(
"dbListFields",
signature(conn="AthenaConnection", name="character"),
definition = function(conn, name, schema, ...) {
query <- sprintf("SELECT * FROM %s.%s LIMIT 1", schema, name)
res <- dbGetQuery(conn, query)
colnames(res)
}
)
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param name table name
#' @param schema Athena schema name
#' @param ... unused
#' @export
setMethod(
"dbReadTable",
signature(conn="AthenaConnection", name="character"),
definition = function(conn, name, schema, ...) {
query <- sprintf("SELECT * FROM %s.%s LIMIT 1", schema, dbQuoteString(conn, name))
dbGetQuery(conn, query)
}
)

48
R/metis-lite-package.R

@ -1,48 +0,0 @@
#' Access and Query Amazon Athena via DBI/JDBC
#'
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation
#' is included along with an interface to the 'AWS' command-line utility.
#'
#' @name metis.lite
#'
#' @section IMPORTANT:
#'
#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting
#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid
#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is
#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this
#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`:
#'
#'
#' if (!grepl("-Xrs", getOption("java.parameters", ""))) {
#' options(
#' "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
#' )
#' }
#'
#' @md
#' @encoding UTF-8
#' @keywords internal
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC DBI bit64 dbplyr
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL
#' Use Credentials from .aws/credentials File
#'
#' @md
#' @importFrom aws.signature use_credentials read_credentials
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()]
#' @name use_credentials
#' @rdname use_credentials
#' @inheritParams aws.signature::use_credentials
#' @export
NULL
#' @name read_credentials
#' @rdname use_credentials
#' @export
NULL

15
R/metis-tidy-package.R

@ -0,0 +1,15 @@
#' Access and Query Amazon Athena via the Tidyverse
#'
#' Methods are provided to use the 'metis' JDBC/DBI interface via
#' the Tidyverse (e.g. 'dbplyr'/'dplyr' idioms).
#'
#' @md
#' @name metis.tidy
#' @encoding UTF-8
#' @keywords internal
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import metis DBI dbplyr
#' @importFrom dplyr tbl
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL

68
R/metis.r

@ -1,68 +0,0 @@
#' Simplified Athena JDBC connection helper
#'
#' Handles the up-front JDBC config
#'
#' @md
#' @param default_schema default schema (you'll still need to fully qualify non-default schema table names)
#' @param region AWS region (Ref: <http://docs.aws.amazon.com/general/latest/gr/rande.html#athena>)
#' @param s3_staging_dir the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user.
#' @param max_error_retries the maximum number of retries that the JDBC client attempts to make a request to Athena.
#' @param connection_timeout the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.
#' @param socket_timeout the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.
# @param retry_base_delay minimum delay amount, in milliseconds, between retrying attempts to connect Athena.
# @param retry_max_backoff_time maximum delay amount, in milliseconds, between retrying attempts to connect Athena.
#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
#' @param log_level log level of the Athena JDBC driver logs. Use names
#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
#' @param ... passed on to the driver
#' @export
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @examples \dontrun{
#' use_credentials("personal")
#'
#' athena_connect(
#' default_schema = "sampledb",
#' s3_staging_dir = "s3://accessible-bucket",
#' log_path = "/tmp/athena.log",
#' log_level = "DEBUG"
#' ) -> ath
#'
#' dbListTables(ath)
#'
#' dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")
#'
#' }
athena_connect <- function(
default_schema = "default",
region = c("us-east-1", "us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
log_path = "",
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"),
...
) {
athena_jdbc <- Athena()
region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2"))
log_level <- match.arg(log_level, c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
dbConnect(
athena_jdbc,
schema_name = default_schema,
region = region,
s3_staging_dir = s3_staging_dir,
max_error_retries = max_error_retries,
connection_timeout = connection_timeout,
socket_timeout = socket_timeout,
log_path = log_path,
log_level = log_level,
...
) -> con
con
}

14
R/sql_translate_env.R

@ -1,5 +1,9 @@
#' @rdname Athena
#' @keywords internal
#' Convert R data type to Athena
#'
#' @rdname dbplyr-interface
#' @param con Athena connection
#' @param fields fields to type reference
#' @param ... ignored
#' @export
db_data_type.AthenaConnection <- function(con, fields, ...) {
print("\n\n\ndb_data_type\n\n\n")
@ -21,8 +25,10 @@ db_data_type.AthenaConnection <- function(con, fields, ...) {
vapply(fields, data_type, character(1))
}
#' @rdname Athena
#' @keywords internal
#' Translate R tridyverse functional idioms to Athena
#'
#' @rdname dbplyr-interface
#' @param con Athena connection
#' @export
sql_translate_env.AthenaConnection <- function(con) {

121
R/z-dbGetQuery.R

@ -1,121 +0,0 @@
list(
"-7" = as.logical, # BIT
"-6" = as.integer, # TINYINT
"-5" = bit64::as.integer64, # BIGINT
"-4" = as.character, # LONGVARBINARY
"-3" = as.character, # VARBINARY
"-2" = as.character, # BINARY
"-1" = as.character, # LONGVARCHAR
"0" = as.character, # NULL
"1" = as.character, # CHAR
"2" = as.double, # NUMERIC
"3" = as.double, # DECIMAL
"4" = as.integer, # INTEGER
"5" = as.integer, # SMALLINT
"6" = as.double, # FLOAT
"7" = as.double, # REAL
"8" = as.double, # DOUBLE
"12" = as.character, # VARCHAR
"16" = as_logical, # BOOLEAN
"91" = as_date, # DATE
"92" = as.character, # TIME
"93" = as_posixct, # TIMESTAMP
"2003" = as.character, # ARRAY
"1111" = as.character # OTHER
) -> .jdbc_converters
#' @export
#' @keywords internal
setMethod("dbGetInfo", "AthenaDriver", def=function(dbObj, ...)
list(
name = "AthenaJDBC",
driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1],
package_version = utils::packageVersion("metis.lite")
)
)
#' @export
#' @keywords internal
setMethod("dbGetInfo", "AthenaConnection", def=function(dbObj, ...)
list(
name = "AthenaJDBC",
driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1],
package_version = utils::packageVersion("metis.lite")
)
)
#' Fetch records from a previously executed query
#'
#' Fetch the next `n` elements (rows) from the result set and return them
#' as a data.frame.
#'
#' @param res An object inheriting from [DBIResult-class], created by
#' [dbSendQuery()].
#' @param n maximum number of records to retrieve per fetch. Use `n = -1`
#' or `n = Inf`
#' to retrieve all pending records. Some implementations may recognize other
#' special values.
#' @param ... Other arguments passed on to methods.
#' @export
setMethod(
"fetch",
signature(res="AthenaResult", n="numeric"),
def = function(res, n, block = 1000L, ...) {
nms <- c()
athena_type_convert <- list()
cols <- .jcall(res@md, "I", "getColumnCount")
for (i in 1:cols) {
ct <- as.character(.jcall(res@md, "I", "getColumnType", i))
athena_type_convert[[i]] <- .jdbc_converters[[ct]]
nms <- c(nms, .jcall(res@md, "S", "getColumnLabel", i))
# message(ct, "|", tail(nms, 1))
}
athena_type_convert <- set_names(athena_type_convert, nms)
out <- callNextMethod(res = res, n = n, block = block, ...)
# print(str(out))
for (nm in names(athena_type_convert)) {
f <- athena_type_convert[[nm]]
if (length(f) == 0) f <- as.character # catchall in case AMZN is tricksy
out[[nm]] <- f(out[[nm]])
}
out
}
)
#' AthenaJDBC
#'
#' @param conn Athena connection
#' @param statement SQL statement
#' @param ... unused
#' @importFrom rJava .jcall
#' @export
setMethod(
"dbGetQuery",
signature(conn="AthenaConnection", statement="character"),
definition = function(conn, statement, ...) {
r <- dbSendQuery(conn, statement, ...)
on.exit(.jcall(r@stat, "V", "close"))
res <- fetch(r, -1, block = conn@fetch_size)
class(res) <- c("tbl_df", "tbl", "data.frame")
res
}
)

9
R/zzz.R

@ -1,11 +1,2 @@
.onLoad <- function(libname, pkgname) {
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
o <- getOption("java.parameters", "")
if (!any(grepl("-Xrs", o))) {
packageStartupMessage(
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ",
"please set this up in your/an Rprofile or at the start of scripts."
)
}
}

148
README.Rmd

@ -3,131 +3,85 @@ output: rmarkdown::github_document
editor_options:
chunk_output_type: console
---
```{r include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
fig.retina = 2
)
Sys.setenv(
AWS_S3_STAGING_DIR = "s3://aws-athena-query-results-569593279821-us-east-1"
)
# metis
Access and Query Amazon Athena via DBI/JDBC
## Description
In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`.
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
## IMPORTANT
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`:
```r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
options(width=120)
```
## What's Inside The Tin?
The following functions are implemented:
Easy-interface connection helper:
# metis.tidy
- `athena_connect` Simplified Athena JDBC connection helper
Access and Query Amazon Athena via the Tidyverse
Custom JDBC Classes:
- `Athena`: AthenaJDBC (make a new Athena con obj)
- `AthenaConnection-class`: AthenaJDBC
- `AthenaDriver-class`: AthenaJDBC
- `AthenaResult-class`: AthenaJDBC
Custom JDBC Class Methods:
## Description
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`
Methods are provided to use the 'metis' JDBC/DBI interface via the Tidyverse
(e.g. 'dbplyr'/'dplyr' idioms).
Pulled in from other `cloudyr` pkgs:
## What's Inside The Tin?
- `read_credentials`: Use Credentials from .aws/credentials File
- `use_credentials`: Use Credentials from .aws/credentials File
Lightweight helpers to make it easier to `filter` and `mutate` plus type support for Athena `BIGINT` (64-bit integers).
## Installation
```{r eval=FALSE}
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-tidy")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
devtools::install_gitlab("hrbrmstr/metis-tidy")
# OR
devtools::install_github("hrbrmstr/metis-lite")
```
```{r message=FALSE, warning=FALSE, include=FALSE}
options(width=120)
devtools::install_github("hrbrmstr/metis-tidy")
```
## Usage
```{r message=FALSE, warning=FALSE}
library(metis.lite)
```{r}
library(metis.tidy)
# current verison
packageVersion("metis.lite")
packageVersion("metis.tidy")
```
```{r message=FALSE, warning=FALSE}
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
dbListTables(con, schema="sampledb")
### Basic Setup (using an alternate provider)
dbExistsTable(con, "elb_logs", schema="sampledb")
```{r}
library(metis.tidy)
library(tidyverse)
metis::dbConnect(
metis::Athena(),
Schema = "sampledb",
AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props")
) -> con
dbListFields(con, "elb_logs", "sampledb")
elb_logs <- tbl(con, "elb_logs")
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
glimpse(elb_logs)
```
### Check types
#### Using custom Athena functions
```{r}
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
filter(elb_logs, elbresponsecode == "200") %>%
mutate(
tsday = as.Date(substring(timestamp, 1L, 10L)),
host = url_extract_host(url),
proto_version = regexp_extract(protocol, "([[:digit:]\\.]+)"),
) %>%
select(tsday, host, receivedbytes, requestprocessingtime, proto_version) %>%
glimpse()
```
#### dplyr
#### All the types work. Some are useful.
```{r}
tbl(con, sql("
@ -151,6 +105,12 @@ LIMIT 1
glimpse()
```
```{r cloc}
cloc::cloc_pkg_md()
```
## Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
Please note that this project is released with a
[Contributor Code of Conduct](CONDUCT.md). By participating in this project you
agree to abide by its terms.

207
README.md

@ -1,184 +1,100 @@
# metis
# metis.tidy
Access and Query Amazon Athena via DBI/JDBC
Access and Query Amazon Athena via the Tidyverse
## Description
In Greek mythology, Metis was Athena’s “helper” so methods are provided
to help you accessing and querying Amazon Athena via DBI/JDBC and/or
`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
lookup schemas/tables,
## IMPORTANT
Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
from interrupting Athena JDBC calls crash the R interpreter. You need to
set the `-Xrs` option to avoid signals being passed on to the JVM owner.
That has to be done *before* `rJava` is loaded so you either need to
remember to put it at the top of all scripts *or* stick this in your
local `~/.Rprofile` and/or sitewide `Rprofile`:
``` r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
```
Methods are provided to use the ‘metis’ JDBC/DBI interface via the
Tidyverse (e.g. ‘dbplyr’/‘dplyr’ idioms).
## What’s Inside The Tin?
The following functions are implemented:
Easy-interface connection helper:
- `athena_connect` Simplified Athena JDBC connection helper
Custom JDBC Classes:
- `Athena`: AthenaJDBC (make a new Athena con obj)
- `AthenaConnection-class`: AthenaJDBC
- `AthenaDriver-class`: AthenaJDBC
- `AthenaResult-class`: AthenaJDBC
Custom JDBC Class Methods:
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`
Pulled in from other `cloudyr` pkgs:
- `read_credentials`: Use Credentials from .aws/credentials File
- `use_credentials`: Use Credentials from .aws/credentials File
Lightweight helpers to make it easier to `filter` and `mutate` plus type
support for Athena `BIGINT` (64-bit integers).
## Installation
``` r
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-tidy")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
devtools::install_gitlab("hrbrmstr/metis-tidy")
# OR
devtools::install_github("hrbrmstr/metis-lite")
devtools::install_github("hrbrmstr/metis-tidy")
```
## Usage
``` r
library(metis.lite)
library(metis.tidy)
# current verison
packageVersion("metis.lite")
packageVersion("metis.tidy")
```
## [1] '0.3.0'
``` r
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
dbListTables(con, schema="sampledb")
```
## [1] "elb_logs"
### Basic Setup (using an alternate provider)
``` r
dbExistsTable(con, "elb_logs", schema="sampledb")
```
library(metis.tidy)
library(tidyverse)
metis::dbConnect(
metis::Athena(),
Schema = "sampledb",
AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props")
) -> con
## [1] TRUE
elb_logs <- tbl(con, "elb_logs")
``` r
dbListFields(con, "elb_logs", "sampledb")
glimpse(elb_logs)
```
## [1] "timestamp" "elbname" "requestip" "requestport"
## [5] "backendip" "backendport" "requestprocessingtime" "backendprocessingtime"
## [9] "clientresponsetime" "elbresponsecode" "backendresponsecode" "receivedbytes"
## [13] "sentbytes" "requestverb" "url" "protocol"
``` r
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
```
## Observations: 10
## Observations: ??
## Variables: 16
## $ timestamp <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725…
## Database: AthenaConnection
## $ timestamp <chr> "2014-09-26T22:00:22.979295Z", "2014-09-26T22:29:23.126181Z", "2014-09-26T22:29:28.5918…
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
## $ requestip <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2
## $ requestport <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
## $ backendip <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2
## $ backendport <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518
## $ clientresponsetime <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007
## $ elbresponsecode <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
## $ requestip <chr> "247.43.35.131", "241.223.213.183", "253.116.237.195", "242.66.178.92", "255.185.7.21",…
## $ requestport <int> 37400, 45861, 37986, 57949, 62239, 9273, 17666, 62239, 62239, 62239, 15875, 37677, 2813…
## $ backendip <chr> "253.223.87.30", "252.173.201.86", "250.50.14.107", "247.172.229.147", "253.141.227.189…
## $ backendport <int> 80, 443, 8888, 8888, 8000, 8888, 8888, 8888, 8899, 8888, 8899, 8888, 8888, 8899, 8888, …
## $ requestprocessingtime <dbl> 0.000092, 0.000074, 0.000076, 0.000102, 0.000067, 0.000051, 0.000057, 0.000079, 0.00009…
## $ backendprocessingtime <dbl> 0.046512, 0.319001, 0.411608, 0.410884, 0.021358, 0.017171, 0.161456, 0.040714, 0.03277…
## $ clientresponsetime <dbl> 0.000068, 0.000074, 0.000070, 0.000068, 0.000040, 0.000032, 0.000042, 0.000044, 0.00004
## $ elbresponsecode <chr> "200", "500", "500", "500", "200", "200", "500", "200", "200", "200", "200", "200", "20…
## $ backendresponsecode <chr> "200", "200", "404", "200", "200", "200", "404", "403", "200", "200", "200", "404", "50…
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ sentbytes <S3: integer64> 2, 30256, 30256, 30256, 52442, 8194, 27952, 1888, 2, 70883, 40191, 1717, 614,…
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GE…
## $ url <chr> "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=fmi7id4", "http://…
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
### Check types
#### Using custom Athena functions
``` r
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
filter(elb_logs, elbresponsecode == "200") %>%
mutate(
tsday = as.Date(substring(timestamp, 1L, 10L)),
host = url_extract_host(url),
proto_version = regexp_extract(protocol, "([[:digit:]\\.]+)"),
) %>%
select(tsday, host, receivedbytes, requestprocessingtime, proto_version) %>%
glimpse()
```
## Observations: 1
## Variables: 13
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-26
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr1 <chr> "1, 2, 3"
## $ arr2 <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
## Observations: ??
## Variables: 5
## Database: AthenaConnection
## $ tsday <date> 2014-09-26, 2014-09-26, 2014-09-26, 2014-09-26, 2014-09-26, 2014-09-26, 2014-09-26, 20…
## $ host <chr> "www.abcxyz.com", "www.abcxyz.com", "www.abcxyz.com", "www.abcxyz.com", "www.abcxyz.com…
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ requestprocessingtime <dbl> 0.000089, 0.000087, 0.000084, 0.000079, 0.000120, 0.000081, 0.000090, 0.000091, 0.00009…
## $ proto_version <chr> "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.1", "1.…
#### dplyr
#### All the types work. Some are useful.
``` r
tbl(con, sql("
@ -207,7 +123,7 @@ LIMIT 1
## Database: AthenaConnection
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-27
## $ tsday <date> 2014-09-26
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
@ -219,6 +135,15 @@ LIMIT 1
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
``` r
cloc::cloc_pkg_md()
```
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 6 | 0.86 | 131 | 0.71 | 13 | 0.34 | 27 | 0.42 |
| Rmd | 1 | 0.14 | 53 | 0.29 | 25 | 0.66 | 38 | 0.58 |
## Code of Conduct
Please note that this project is released with a [Contributor Code of

18
man/Athena.Rd

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r, R/sql_translate_env.R
\name{Athena}
\alias{Athena}
\alias{db_data_type.AthenaConnection}
\alias{sql_translate_env.AthenaConnection}
\title{AthenaJDBC}
\usage{
Athena(identifier.quote = "`")
db_data_type.AthenaConnection(con, fields, ...)
sql_translate_env.AthenaConnection(con)
}
\description{
AthenaJDBC
}
\keyword{internal}

9
man/AthenaConnection-class.Rd

@ -1,9 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaConnection-class}
\alias{AthenaConnection-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

9
man/AthenaDriver-class.Rd

@ -1,9 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaDriver-class}
\alias{AthenaDriver-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

9
man/AthenaResult-class.Rd

@ -1,9 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaResult-class}
\alias{AthenaResult-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

57
man/athena_connect.Rd

@ -1,57 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis.r
\name{athena_connect}
\alias{athena_connect}
\title{Simplified Athena JDBC connection helper}
\usage{
athena_connect(default_schema = "default", region = c("us-east-1",
"us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = c("OFF",
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...)
}
\arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
\item{region}{AWS region (Ref: \url{http://docs.aws.amazon.com/general/latest/gr/rande.html#athena})}
\item{s3_staging_dir}{the Amazon S3 location to which your query output is written. The JDBC driver then asks Athena to read the results and provide rows of data back to the user.}
\item{max_error_retries}{the maximum number of retries that the JDBC client attempts to make a request to Athena.}
\item{connection_timeout}{the maximum amount of time, in milliseconds, to make a successful connection to Athena before an attempt is terminated.}
\item{socket_timeout}{the maximum amount of time, in milliseconds, to wait for a socket in order to send data to Athena.}
\item{log_path}{local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.}
\item{log_level}{log level of the Athena JDBC driver logs. Use names
"OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}
\item{...}{passed on to the driver}
}
\description{
Handles the up-front JDBC config
}
\examples{
\dontrun{
use_credentials("personal")
athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath
dbListTables(ath)
dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")
}
}
\references{
\href{https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html}{Connect with JDBC};
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}

48
man/dbConnect-AthenaDriver-method.Rd

@ -1,48 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbConnect,AthenaDriver-method}
\alias{dbConnect,AthenaDriver-method}
\title{AthenaJDBC}
\usage{
\S4method{dbConnect}{AthenaDriver}(drv,
provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default", fetch_size = 1000L,
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = 0, ...)
}
\arguments{
\item{provider}{JDBC auth provider (ideally leave default)}
\item{region}{AWS region the Athena tables are in}
\item{s3_staging_dir}{A write-able bucket on S3 that you have permissions for}
\item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver}
\item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.}
\item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit
of data in logs. Set this to a temporary directory or something log4j can use. For
`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
their corresponding integer values 0-6.}
\item{...}{passed on to the driver. See Details.}
}
\description{
Connect to Athena
}
\section{Driver Configuration Options}{
- `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
- `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
- `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
}
\references{
[Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
[Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
}

22
man/dbExistsTable-AthenaConnection-character-method.Rd

@ -1,22 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbExistsTable,AthenaConnection,character-method}
\alias{dbExistsTable,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema,
...)
}
\arguments{
\item{conn}{Athena connection}
\item{name}{table name}
\item{schema}{Athena schema name}
\item{...}{unused}
}
\description{
AthenaJDBC
}

19
man/dbGetQuery-AthenaConnection-character-method.Rd

@ -1,19 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/z-dbGetQuery.R
\docType{methods}
\name{dbGetQuery,AthenaConnection,character-method}
\alias{dbGetQuery,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
}
\arguments{
\item{conn}{Athena connection}
\item{statement}{SQL statement}
\item{...}{unused}
}
\description{
AthenaJDBC
}

22
man/dbListFields-AthenaConnection-character-method.Rd

@ -1,22 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbListFields,AthenaConnection,character-method}
\alias{dbListFields,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema,
...)
}
\arguments{
\item{conn}{Athena connection}
\item{name}{table name}
\item{schema}{Athena schema name}
\item{...}{unused}
}
\description{
AthenaJDBC
}

22
man/dbListTables-AthenaConnection-method.Rd

@ -1,22 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbListTables,AthenaConnection-method}
\alias{dbListTables,AthenaConnection-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema,
...)
}
\arguments{
\item{conn}{Athena connection}
\item{pattern}{table name pattern}
\item{schema}{Athena schema name}
\item{...}{unused}
}
\description{
AthenaJDBC
}

21
man/dbReadTable-AthenaConnection-character-method.Rd

@ -1,21 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbReadTable,AthenaConnection,character-method}
\alias{dbReadTable,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbReadTable}{AthenaConnection,character}(conn, name, schema, ...)
}
\arguments{
\item{conn}{Athena connection}
\item{name}{table name}
\item{schema}{Athena schema name}
\item{...}{unused}
}
\description{
AthenaJDBC
}

19
man/dbSendQuery-AthenaConnection-character-method.Rd

@ -1,19 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbSendQuery,AthenaConnection,character-method}
\alias{dbSendQuery,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbSendQuery}{AthenaConnection,character}(conn, statement, ...)
}
\arguments{
\item{conn}{Athena connection}
\item{statement}{SQL statement}
\item{...}{unused}
}
\description{
AthenaJDBC
}

25
man/dbplyr-interface.Rd

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sql_translate_env.R
\name{db_data_type.AthenaConnection}
\alias{db_data_type.AthenaConnection}
\alias{sql_translate_env.AthenaConnection}
\title{Convert R data type to Athena}
\usage{
db_data_type.AthenaConnection(con, fields, ...)
sql_translate_env.AthenaConnection(con)
}
\arguments{
\item{con}{Athena connection}
\item{fields}{fields to type reference}
\item{...}{ignored}
\item{con}{Athena connection}
}
\description{
Convert R data type to Athena
Translate R tridyverse functional idioms to Athena
}

24
man/fetch-AthenaResult-numeric-method.Rd

@ -1,24 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/z-dbGetQuery.R
\docType{methods}
\name{fetch,AthenaResult,numeric-method}
\alias{fetch,AthenaResult,numeric-method}
\title{Fetch records from a previously executed query}
\usage{
\S4method{fetch}{AthenaResult,numeric}(res, n, block = 1000L, ...)
}
\arguments{
\item{res}{An object inheriting from [DBIResult-class], created by
[dbSendQuery()].}
\item{n}{maximum number of records to retrieve per fetch. Use `n = -1`
or `n = Inf`
to retrieve all pending records. Some implementations may recognize other
special values.}
\item{...}{Other arguments passed on to methods.}
}
\description{
Fetch the next `n` elements (rows) from the result set and return them
as a data.frame.
}

35
man/metis.lite.Rd

@ -1,35 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-lite-package.R
\docType{package}
\encoding{UTF-8}
\name{metis.lite}
\alias{metis.lite}
\alias{metis.lite-package}
\title{Access and Query Amazon Athena via DBI/JDBC}
\description{
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with an interface to the 'AWS' command-line utility.
}
\section{IMPORTANT}{
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting
Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid
signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is
loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this
in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
}
}
\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

19
man/metis.tidy.Rd

@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-tidy-package.R
\docType{package}
\encoding{UTF-8}
\name{metis.tidy}
\alias{metis.tidy}
\alias{metis.tidy-package}
\title{Access and Query Amazon Athena via the Tidyverse}
\description{
Methods are provided to use the 'metis' JDBC/DBI interface via
the Tidyverse (e.g. 'dbplyr'/'dplyr' idioms).
}
\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

12
man/use_credentials.Rd

@ -1,12 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-lite-package.R
\name{use_credentials}
\alias{use_credentials}
\alias{read_credentials}
\title{Use Credentials from .aws/credentials File}
\description{
Use Credentials from .aws/credentials File
}
\references{
\code{\link[aws.signature:use_credentials]{aws.signature::use_credentials()}} / \code{\link[aws.signature:read_credentials]{aws.signature::read_credentials()}}
}

0
metis-lite.Rproj → metis-tidy.Rproj

2
tests/test-all.R

@ -1,2 +1,2 @@
library(testthat)
test_check("metis-lite")
test_check("metis.tidy")

0
tests/testthat/test-metis-lite.R → tests/testthat/test-metis.tidy.R

Loading…
Cancel
Save