Procházet zdrojové kódy

passed checks

master
boB Rudis před 5 roky
rodič
revize
15a5352dc1
V databázi nebyl nalezen žádný známý klíč pro tento podpis GPG Key ID: 1D7529BE14E2BBA9
  1. 3
      .Rbuildignore
  2. 11
      DESCRIPTION
  3. 6
      NAMESPACE
  4. 8
      R/jdbc.r
  5. 48
      R/metis-lite-package.R
  6. 33
      R/metis-package.R
  7. 124
      R/sql_translate_env.R
  8. 16
      R/z-dbGetQuery.R
  9. 9
      R/zzz.R
  10. 61
      README.Rmd
  11. 110
      README.md
  12. binární
      inst/java/AthenaJDBC42_2.0.6.jar
  13. 1
      inst/java/log4j.properties
  14. 12
      man/Athena.Rd
  15. 7
      man/AthenaConnection-class.Rd
  16. 4
      man/dbConnect-AthenaDriver-method.Rd
  17. 18
      man/dbGetInfo-AthenaConnection-method.Rd
  18. 18
      man/dbGetInfo-AthenaDriver-method.Rd
  19. 2
      man/fetch-AthenaResult-numeric-method.Rd
  20. 20
      man/metis.Rd
  21. 35
      man/metis.lite.Rd
  22. 2
      man/use_credentials.Rd
  23. 0
      metis.Rproj
  24. 2
      tests/test-all.R
  25. 6
      tests/testthat/test-metis-lite.R
  26. 28
      tests/testthat/test-metis.R

3
.Rbuildignore

@ -8,5 +8,6 @@
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$
^docs$
^CONDUCT\.md$
^\.bash_profile$

11
DESCRIPTION

@ -1,4 +1,4 @@
Package: metis.lite
Package: metis
Type: Package
Title: Access and Query Amazon Athena via DBI/JDBC
Version: 0.3.0
@ -9,9 +9,9 @@ Authors@R: c(
)
Maintainer: Bob Rudis <bob@rud.is>
Encoding: UTF-8
Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with additional helpers for 'dplyr'/'dplyr' suppprt.
Description: Methods are provided to connect to 'Amazon' 'Athena', lookup
schemas/tables, perform queries and retrieve query results using the
Athena JDBC driver found in 'metis.jars'.
SystemRequirements: JDK 1.8+
License: MIT + file LICENSE
Suggests:
@ -19,11 +19,12 @@ Suggests:
covr
Depends:
R (>= 3.2.0),
metis.jars,
RJDBC
Imports:
rJava,
DBI,
bit64,
dbplyr,
methods,
aws.signature
RoxygenNote: 6.1.1

6
NAMESPACE

@ -2,9 +2,7 @@
export(Athena)
export(athena_connect)
export(db_data_type.AthenaConnection)
export(read_credentials)
export(sql_translate_env.AthenaConnection)
export(use_credentials)
exportClasses(AthenaConnection)
exportClasses(AthenaDriver)
@ -21,7 +19,9 @@ exportMethods(fetch)
import(DBI)
import(RJDBC)
import(bit64)
import(dbplyr)
import(metis.jars)
importFrom(aws.signature,read_credentials)
importFrom(aws.signature,use_credentials)
importFrom(methods,as)
importFrom(methods,callNextMethod)
importFrom(rJava,.jcall)

8
R/jdbc.r

@ -21,12 +21,13 @@ setClass(
#' AthenaJDBC
#'
#' @param identifier.quote how to quote identifiers
#' @export
Athena <- function(identifier.quote = '`') {
JDBC(
driverClass = "com.simba.athena.jdbc.Driver",
system.file("java", "AthenaJDBC42_2.0.6.jar", package = "metis.lite"),
metis.jars::metis_jar_path(),
identifier.quote = identifier.quote
) -> drv
@ -44,6 +45,7 @@ Athena <- function(identifier.quote = '`') {
#' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
#' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
#'
#' @param drv driver
#' @param provider JDBC auth provider (ideally leave default)
#' @param region AWS region the Athena tables are in
#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
@ -54,6 +56,7 @@ Athena <- function(identifier.quote = '`') {
#' of data in logs. Set this to a temporary directory or something log4j can use. For
#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
#' their corresponding integer values 0-6.
#' @param fetch_size Athena results fetch size
#' @param ... passed on to the driver. See Details.
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
@ -109,6 +112,9 @@ setMethod(
#' AthenaJDBC
#'
#' @param jc job ref
#' @param identifier.quote how to quote identifiers
#' @param fetch_size Athena results fetch size
#' @export
setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer"))

48
R/metis-lite-package.R

@ -1,48 +0,0 @@
#' Access and Query Amazon Athena via DBI/JDBC
#'
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation
#' is included along with an interface to the 'AWS' command-line utility.
#'
#' @name metis.lite
#'
#' @section IMPORTANT:
#'
#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting
#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid
#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is
#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this
#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`:
#'
#'
#' if (!grepl("-Xrs", getOption("java.parameters", ""))) {
#' options(
#' "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
#' )
#' }
#'
#' @md
#' @encoding UTF-8
#' @keywords internal
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC DBI bit64 dbplyr
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL
#' Use Credentials from .aws/credentials File
#'
#' @md
#' @importFrom aws.signature use_credentials read_credentials
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()]
#' @name use_credentials
#' @rdname use_credentials
#' @inheritParams aws.signature::use_credentials
#' @export
NULL
#' @name read_credentials
#' @rdname use_credentials
#' @export
NULL

33
R/metis-package.R

@ -0,0 +1,33 @@
#' Access and Query Amazon Athena via DBI/JDBC
#'
#' Methods are provided to connect to 'Amazon' 'Athena', lookup
#' schemas/tables, perform queries and retrieve query results using the
#' Athena JDBC driver found in 'metis.jars'.
#'
#' @name metis
#'
#' @md
#' @encoding UTF-8
#' @keywords internal
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC DBI bit64 metis.jars
#' @importFrom methods as callNextMethod
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL
#' Use Credentials from .aws/credentials File
#'
#' @md
#' @importFrom aws.signature use_credentials read_credentials
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()]
#' @name use_credentials
#' @rdname use_credentials
#' @inheritParams aws.signature::use_credentials
#' @export
NULL
#' @name read_credentials
#' @rdname use_credentials
#' @export
NULL

124
R/sql_translate_env.R

@ -1,124 +0,0 @@
#' @rdname Athena
#' @keywords internal
#' @export
db_data_type.AthenaConnection <- function(con, fields, ...) {
print("\n\n\ndb_data_type\n\n\n")
data_type <- function(x) {
switch(
class(x)[1],
integer64 = "BIGINT",
logical = "BOOLEAN",
integer = "INTEGER",
numeric = "DOUBLE",
factor = "CHARACTER",
character = "CHARACTER",
Date = "DATE",
POSIXct = "TIMESTAMP",
stop("Can't map type ", paste(class(x), collapse = "/"),
" to a supported database type.")
)
}
vapply(fields, data_type, character(1))
}
#' @rdname Athena
#' @keywords internal
#' @export
sql_translate_env.AthenaConnection <- function(con) {
x <- con
dbplyr::sql_variant(
scalar = dbplyr::sql_translator(
.parent = dbplyr::base_scalar,
`!=` = dbplyr::sql_infix("<>"),
as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"),
as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"),
as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"),
as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"),
date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"),
grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"),
gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"),
trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"),
cbrt = dbplyr::sql_prefix("CBRT", 1),
degrees = dbplyr::sql_prefix("DEGREES", 1),
e = dbplyr::sql_prefix("E", 0),
row_number = dbplyr::sql_prefix("row_number", 0),
lshift = dbplyr::sql_prefix("LSHIFT", 2),
mod = dbplyr::sql_prefix("MOD", 2),
age = dbplyr::sql_prefix("AGE", 1),
negative = dbplyr::sql_prefix("NEGATIVE", 1),
pi = dbplyr::sql_prefix("PI", 0),
pow = dbplyr::sql_prefix("POW", 2),
radians = dbplyr::sql_prefix("RADIANS", 1),
rand = dbplyr::sql_prefix("RAND", 0),
rshift = dbplyr::sql_prefix("RSHIFT", 2),
trunc = dbplyr::sql_prefix("TRUNC", 2),
contains = dbplyr::sql_prefix("CONTAINS", 2),
convert_to = dbplyr::sql_prefix("CONVERT_TO", 2),
convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2),
string_binary = dbplyr::sql_prefix("STRING_BINARY", 1),
binary_string = dbplyr::sql_prefix("BINARY_STRING", 1),
to_char = dbplyr::sql_prefix("TO_CHAR", 2),
to_date = dbplyr::sql_prefix("TO_DATE", 2),
to_number = dbplyr::sql_prefix("TO_NUMBER", 2),
char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2),
double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1),
char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1),
flatten = dbplyr::sql_prefix("FLATTEN", 1),
kvgen = dbplyr::sql_prefix("KVGEN", 1),
repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1),
repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2),
ilike = dbplyr::sql_prefix("ILIKE", 2),
init_cap = dbplyr::sql_prefix("INIT_CAP", 1),
length = dbplyr::sql_prefix("LENGTH", 1),
lower = dbplyr::sql_prefix("LOWER", 1),
tolower = dbplyr::sql_prefix("LOWER", 1),
ltrim = dbplyr::sql_prefix("LTRIM", 2),
nullif = dbplyr::sql_prefix("NULLIF", 2),
position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"),
regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3),
rtrim = dbplyr::sql_prefix("RTRIM", 2),
rpad = dbplyr::sql_prefix("RPAD", 2),
rpad_with = dbplyr::sql_prefix("RPAD", 3),
lpad = dbplyr::sql_prefix("LPAD", 2),
lpad_with = dbplyr::sql_prefix("LPAD", 3),
strpos = dbplyr::sql_prefix("STRPOS", 2),
substr = dbplyr::sql_prefix("SUBSTR", 3),
trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"),
upper = dbplyr::sql_prefix("UPPER", 1),
toupper = dbplyr::sql_prefix("UPPER", 1)
),
aggregate = dbplyr::sql_translator(
.parent = dbplyr::base_agg,
n = function() dbplyr::sql("COUNT(*)"),
cor = dbplyr::sql_prefix("CORR"),
cov = dbplyr::sql_prefix("COVAR_SAMP"),
sd = dbplyr::sql_prefix("STDDEV_SAMP"),
var = dbplyr::sql_prefix("VAR_SAMP"),
n_distinct = function(x) {
dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")"))
}
),
window = dbplyr::sql_translator(
.parent = dbplyr::base_win,
n = function() { dbplyr::win_over(dbplyr::sql("count(*)"),
partition = dbplyr::win_current_group()) },
cor = dbplyr::win_recycled("corr"),
cov = dbplyr::win_recycled("covar_samp"),
sd = dbplyr::win_recycled("stddev_samp"),
var = dbplyr::win_recycled("var_samp"),
all = dbplyr::win_recycled("bool_and"),
any = dbplyr::win_recycled("bool_or")
)
)
}

16
R/z-dbGetQuery.R

@ -24,27 +24,34 @@ list(
"1111" = as.character # OTHER
) -> .jdbc_converters
#' Retrieve connection/driver/database metadata
#'
#' @param dbObj driver/connection
#' @param ... unused
#' @export
#' @keywords internal
setMethod("dbGetInfo", "AthenaDriver", def=function(dbObj, ...)
list(
name = "AthenaJDBC",
driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1],
package_version = utils::packageVersion("metis.lite")
driver_version = metis.jars::simba_driver_version(),
package_version = utils::packageVersion("metis.jars")
)
)
#' Retrieve connection/driver//database metadata
#'
#' @param dbObj driver/connection
#' @param ... unused
#' @export
#' @keywords internal
setMethod("dbGetInfo", "AthenaConnection", def=function(dbObj, ...)
list(
name = "AthenaJDBC",
driver_version = list.files(system.file("java", package="metis.lite"), "jar$")[1],
package_version = utils::packageVersion("metis.lite")
package_version = utils::packageVersion("metis")
)
)
#' Fetch records from a previously executed query
#'
#' Fetch the next `n` elements (rows) from the result set and return them
@ -56,6 +63,7 @@ setMethod("dbGetInfo", "AthenaConnection", def=function(dbObj, ...)
#' or `n = Inf`
#' to retrieve all pending records. Some implementations may recognize other
#' special values.
#' @param block clock size
#' @param ... Other arguments passed on to methods.
#' @export
setMethod(

9
R/zzz.R

@ -1,11 +1,2 @@
.onLoad <- function(libname, pkgname) {
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
o <- getOption("java.parameters", "")
if (!any(grepl("-Xrs", o))) {
packageStartupMessage(
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ",
"please set this up in your/an Rprofile or at the start of scripts."
)
}
}

61
README.Rmd

@ -10,20 +10,11 @@ Access and Query Amazon Athena via DBI/JDBC
## Description
In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`.
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
In Greek mythology, Metis was Athena's "helper" so...
## IMPORTANT
Methods are provided to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results via the included JDBC DBI driver.
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`:
```r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
```
## What's Inside The Tin?
The following functions are implemented:
@ -57,11 +48,11 @@ Pulled in from other `cloudyr` pkgs:
## Installation
```{r eval=FALSE}
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
devtools::install_gitlab("hrbrmstr/metis")
# OR
devtools::install_github("hrbrmstr/metis-lite")
devtools::install_github("hrbrmstr/metis")
```
```{r message=FALSE, warning=FALSE, include=FALSE}
@ -71,22 +62,20 @@ options(width=120)
## Usage
```{r message=FALSE, warning=FALSE}
library(metis.lite)
library(metis)
# current verison
packageVersion("metis.lite")
packageVersion("metis")
```
```{r message=FALSE, warning=FALSE}
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
library(metis)
library(magrittr)
dbConnect(
drv = metis.lite::Athena(),
drv = metis::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
@ -100,7 +89,7 @@ dbExistsTable(con, "elb_logs", schema="sampledb")
dbListFields(con, "elb_logs", "sampledb")
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
dplyr::glimpse()
```
### Check types
@ -124,31 +113,11 @@ SELECT
FROM elb_logs
LIMIT 1
") %>%
glimpse()
dplyr::glimpse()
```
#### dplyr
```{r}
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
```{r cloc}
cloc::cloc_pkg_md()
```
## Code of Conduct

110
README.md

@ -5,27 +5,11 @@ Access and Query Amazon Athena via DBI/JDBC
## Description
In Greek mythology, Metis was Athena’s “helper” so methods are provided
to help you accessing and querying Amazon Athena via DBI/JDBC and/or
`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
lookup schemas/tables,
In Greek mythology, Metis was Athena’s “helper” so…
## IMPORTANT
Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
from interrupting Athena JDBC calls crash the R interpreter. You need to
set the `-Xrs` option to avoid signals being passed on to the JVM owner.
That has to be done *before* `rJava` is loaded so you either need to
remember to put it at the top of all scripts *or* stick this in your
local `~/.Rprofile` and/or sitewide `Rprofile`:
``` r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
```
Methods are provided to connect to ‘Amazon’ ‘Athena’, lookup
schemas/tables, perform queries and retrieve query results via the
included JDBC DBI driver.
## What’s Inside The Tin?
@ -60,20 +44,20 @@ Pulled in from other `cloudyr` pkgs:
## Installation
``` r
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
devtools::install_gitlab("hrbrmstr/metis")
# OR
devtools::install_github("hrbrmstr/metis-lite")
devtools::install_github("hrbrmstr/metis")
```
## Usage
``` r
library(metis.lite)
library(metis)
# current verison
packageVersion("metis.lite")
packageVersion("metis")
```
## [1] '0.3.0'
@ -81,10 +65,8 @@ packageVersion("metis.lite")
``` r
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)
library(metis)
library(magrittr)
dbConnect(
drv = metis.lite::Athena(),
@ -116,26 +98,26 @@ dbListFields(con, "elb_logs", "sampledb")
``` r
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
dplyr::glimpse()
```
## Observations: 10
## Variables: 16
## $ timestamp <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725
## $ timestamp <chr> "2014-09-29T03:24:38.169500Z", "2014-09-29T03:25:09.029469Z", "2014-09-29T03:25:39.8676
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
## $ requestip <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2
## $ requestport <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
## $ backendip <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2
## $ backendport <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518
## $ clientresponsetime <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007…
## $ elbresponsecode <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ requestip <chr> "253.89.30.138", "248.64.121.231", "245.21.209.210", "244.77.57.59", "244.185.170.87",
## $ requestport <int> 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159, 20159
## $ backendip <chr> "253.89.30.138", "244.77.57.59", "240.105.192.251", "253.89.30.138", "248.64.121.231",
## $ backendport <int> 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 7.5e-05, 9.1e-05, 9.0e-05, 9.5e-05, 8.9e-05, 9.3e-05, 8.7e-05, 9.2e-05, 9.0e-05, 9.1e-05
## $ backendprocessingtime <dbl> 0.047465, 0.044693, 0.045687, 0.051089, 0.045445, 0.045845, 0.046027, 0.045039, 0.05010
## $ clientresponsetime <dbl> 6.5e-05, 7.2e-05, 6.4e-05, 7.0e-05, 5.4e-05, 6.7e-05, 5.7e-05, 4.6e-05, 8.7e-05, 4.9e-05
## $ elbresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "400", "200", "404", "200", "403", "404", "200", "200"
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
## $ sentbytes <S3: integer64> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
## $ url <chr> "http://www.abcxyz.com:80/jobbrowser/?format=json&state=running&user=248nnm5", "http://…
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…
### Check types
@ -159,7 +141,7 @@ SELECT
FROM elb_logs
LIMIT 1
") %>%
glimpse()
dplyr::glimpse()
```
## Observations: 1
@ -178,46 +160,14 @@ LIMIT 1
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
#### dplyr
``` r
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
cloc::cloc_pkg_md()
```
## Observations: ??
## Variables: 13
## Database: AthenaConnection
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-27
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr <chr> "1, 2, 3"
## $ arr <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 8 | 0.89 | 232 | 0.85 | 77 | 0.71 | 160 | 0.76 |
| Rmd | 1 | 0.11 | 42 | 0.15 | 32 | 0.29 | 51 | 0.24 |
## Code of Conduct

binární
inst/java/AthenaJDBC42_2.0.6.jar

Binární soubor nebyl zobrazen.

1
inst/java/log4j.properties

@ -1 +0,0 @@
log4j.rootLogger=WARN

12
man/Athena.Rd

@ -1,18 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r, R/sql_translate_env.R
% Please edit documentation in R/jdbc.r
\name{Athena}
\alias{Athena}
\alias{db_data_type.AthenaConnection}
\alias{sql_translate_env.AthenaConnection}
\title{AthenaJDBC}
\usage{
Athena(identifier.quote = "`")
db_data_type.AthenaConnection(con, fields, ...)
sql_translate_env.AthenaConnection(con)
}
\arguments{
\item{identifier.quote}{how to quote identifiers}
}
\description{
AthenaJDBC
}
\keyword{internal}

7
man/AthenaConnection-class.Rd

@ -4,6 +4,13 @@
\name{AthenaConnection-class}
\alias{AthenaConnection-class}
\title{AthenaJDBC}
\arguments{
\item{jc}{job ref}
\item{identifier.quote}{how to quote identifiers}
\item{fetch_size}{Athena results fetch size}
}
\description{
AthenaJDBC
}

4
man/dbConnect-AthenaDriver-method.Rd

@ -14,6 +14,8 @@
socket_timeout = 10000, log_path = "", log_level = 0, ...)
}
\arguments{
\item{drv}{driver}
\item{provider}{JDBC auth provider (ideally leave default)}
\item{region}{AWS region the Athena tables are in}
@ -22,6 +24,8 @@
\item{schema_name}{LOL if only this actually worked with Amazon's hacked Presto driver}
\item{fetch_size}{Athena results fetch size}
\item{max_error_retries, connection_timeout, socket_timeout}{technical connection info that you should only muck with if you know what you're doing.}
\item{log_path, log_level}{The Athena JDBC driver can (shockingly) provide a decent bit

18
man/dbGetInfo-AthenaConnection-method.Rd

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/z-dbGetQuery.R
\docType{methods}
\name{dbGetInfo,AthenaConnection-method}
\alias{dbGetInfo,AthenaConnection-method}
\title{Retrieve connection/driver//database metadata}
\usage{
\S4method{dbGetInfo}{AthenaConnection}(dbObj, ...)
}
\arguments{
\item{dbObj}{driver/connection}
\item{...}{unused}
}
\description{
Retrieve connection/driver//database metadata
}
\keyword{internal}

18
man/dbGetInfo-AthenaDriver-method.Rd

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/z-dbGetQuery.R
\docType{methods}
\name{dbGetInfo,AthenaDriver-method}
\alias{dbGetInfo,AthenaDriver-method}
\title{Retrieve connection/driver/database metadata}
\usage{
\S4method{dbGetInfo}{AthenaDriver}(dbObj, ...)
}
\arguments{
\item{dbObj}{driver/connection}
\item{...}{unused}
}
\description{
Retrieve connection/driver/database metadata
}
\keyword{internal}

2
man/fetch-AthenaResult-numeric-method.Rd

@ -16,6 +16,8 @@ or `n = Inf`
to retrieve all pending records. Some implementations may recognize other
special values.}
\item{block}{clock size}
\item{...}{Other arguments passed on to methods.}
}
\description{

20
man/metis.Rd

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-package.R
\docType{package}
\encoding{UTF-8}
\name{metis}
\alias{metis}
\alias{metis-package}
\title{Access and Query Amazon Athena via DBI/JDBC}
\description{
Methods are provided to connect to 'Amazon' 'Athena', lookup
schemas/tables, perform queries and retrieve query results using the
Athena JDBC driver found in 'metis.jars'.
}
\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

35
man/metis.lite.Rd

@ -1,35 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-lite-package.R
\docType{package}
\encoding{UTF-8}
\name{metis.lite}
\alias{metis.lite}
\alias{metis.lite-package}
\title{Access and Query Amazon Athena via DBI/JDBC}
\description{
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with an interface to the 'AWS' command-line utility.
}
\section{IMPORTANT}{
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting
Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid
signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is
loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this
in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
)
}
}
}
\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

2
man/use_credentials.Rd

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-lite-package.R
% Please edit documentation in R/metis-package.R
\name{use_credentials}
\alias{use_credentials}
\alias{read_credentials}

0
metis-lite.Rproj → metis.Rproj

2
tests/test-all.R

@ -1,2 +1,2 @@
library(testthat)
test_check("metis-lite")
test_check("metis")

6
tests/testthat/test-metis-lite.R

@ -1,6 +0,0 @@
context("basic functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})

28
tests/testthat/test-metis.R

@ -0,0 +1,28 @@
context("Driver & queries work")
skip_on_cran()
drv <- metis::Athena()
expect_is(drv, "AthenaDriver")
dbConnect(
drv = drv,
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con
expect_is(con, "AthenaConnection")
expect_equal(dbListTables(con, schema="sampledb"), "elb_logs")
expect_true(dbExistsTable(con, "elb_logs", schema="sampledb"))
expect_true("url" %in% dbListFields(con, "elb_logs", "sampledb"))
expect_is(
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10"),
"data.frame"
)
Načítá se…
Zrušit
Uložit