|
4 years ago | |
---|---|---|
R | 4 years ago | |
inst/java | 4 years ago | |
man | 4 years ago | |
tests | 4 years ago | |
.Rbuildignore | 4 years ago | |
.bash_profile | 5 years ago | |
.gitignore | 6 years ago | |
CONDUCT.md | 5 years ago | |
DESCRIPTION | 4 years ago | |
LICENSE | 4 years ago | |
LICENSE.md | 4 years ago | |
NAMESPACE | 4 years ago | |
NEWS.md | 4 years ago | |
README.Rmd | 4 years ago | |
README.md | 4 years ago | |
metis-lite.Rproj | 4 years ago |
README.md
metis
Access and Query Amazon Athena via DBI/JDBC
Description
In Greek mythology, Metis was Athena’s “helper” so methods are provided
to help you accessing and querying Amazon Athena via DBI/JDBC and/or
dplyr
. #’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
lookup schemas/tables,
IMPORTANT
Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
from interrupting Athena JDBC calls crash the R interpreter. You need to
set the -Xrs
option to avoid signals being passed on to the JVM owner.
That has to be done before rJava
is loaded so you either need to
remember to put it at the top of all scripts or stick this in your
local ~/.Rprofile
and/or sitewide Rprofile
:
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
What’s Inside The Tin?
The following functions are implemented:
Easy-interface connection helper:
athena_connect
Simplified Athena JDBC connection helper
Custom JDBC Classes:
Athena
: AthenaJDBC (make a new Athena con obj)AthenaConnection-class
: AthenaJDBCAthenaDriver-class
: AthenaJDBCAthenaResult-class
: AthenaJDBC
Custom JDBC Class Methods:
dbConnect-method
dbExistsTable-method
dbGetQuery-method
dbListFields-method
dbListTables-method
dbReadTable-method
dbSendQuery-method
Pulled in from other cloudyr
pkgs:
read_credentials
: Use Credentials from .aws/credentials Fileuse_credentials
: Use Credentials from .aws/credentials File
Installation
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
# OR
devtools::install_github("hrbrmstr/metis-lite")
Usage
library(metis.lite)
# current verison
packageVersion("metis.lite")
## [1] '0.3.0'
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
dbListTables(con, schema="sampledb")
## [1] "elb_logs"
dbExistsTable(con, "elb_logs", schema="sampledb")
## [1] TRUE
dbListFields(con, "elb_logs", "sampledb")
## [1] "timestamp" "elbname" "requestip" "requestport"
## [5] "backendip" "backendport" "requestprocessingtime" "backendprocessingtime"
## [9] "clientresponsetime" "elbresponsecode" "backendresponsecode" "receivedbytes"
## [13] "sentbytes" "requestverb" "url" "protocol"
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
## Observations: 10
## Variables: 16
## $ timestamp <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725…
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
## $ requestip <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2…
## $ requestport <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
## $ backendip <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2…
## $ backendport <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518…
## $ clientresponsetime <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007…
## $ elbresponsecode <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
Check types
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
glimpse()
## Observations: 1
## Variables: 13
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-26
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr1 <chr> "1, 2, 3"
## $ arr2 <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
dplyr
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
## Observations: ??
## Variables: 13
## Database: AthenaConnection
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-27
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr <chr> "1, 2, 3"
## $ arr <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
Code of Conduct
Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.