Kaynağa Gözat

dplyr bits working

master
boB Rudis 1 yıl önce
ebeveyn
işleme
646d4938e8
Veri tabanında bu imza için bilinen anahtar bulunamadı GPG Anahtar Kimliği: 1D7529BE14E2BBA9
13 değiştirilmiş dosya ile 325 ekleme ve 149 silme
  1. +2
    -2
      DESCRIPTION
  2. +4
    -0
      R/a-utils.R
  3. +20
    -4
      R/jdbc.r
  4. +2
    -1
      R/metis-lite-package.R
  5. +15
    -16
      R/metis.r
  6. +10
    -4
      R/z-dbGetQuery.R
  7. +8
    -1
      R/zzz.R
  8. +87
    -42
      README.Rmd
  9. +143
    -64
      README.md
  10. +14
    -6
      man/athena_connect.Rd
  11. +15
    -6
      man/dbConnect-AthenaDriver-method.Rd
  12. +1
    -2
      man/dbGetQuery-AthenaConnection-character-method.Rd
  13. +4
    -1
      man/metis.lite.Rd

+ 2
- 2
DESCRIPTION Dosyayı Görüntüle

@@ -1,6 +1,6 @@
Package: metis.lite
Type: Package
Title: Helpers for Accessing and Querying Amazon Athena
Title: Access and Query Amazon Athena via DBI/JDBC
Version: 0.3.0
Date: 2019-02-14
Authors@R: c(
@@ -11,7 +11,7 @@ Maintainer: Bob Rudis <bob@rud.is>
Encoding: UTF-8
Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
is included along with an interface to the 'AWS' command-line utility.
is included along with additional helpers for 'dplyr'/'dplyr' suppprt.
SystemRequirements: JDK 1.8+
License: MIT + file LICENSE
Suggests:


+ 4
- 0
R/a-utils.R Dosyayı Görüntüle

@@ -3,6 +3,10 @@ set_names <- function (object = nm, nm) {
object
}

as_logical <- function(x) {
as.logical(as.integer(x))
}

as_date <- function(x) {
as.Date(x, origin = "1970-01-01")
}


+ 20
- 4
R/jdbc.r Dosyayı Görüntüle

@@ -36,6 +36,14 @@ Athena <- function(identifier.quote = '`') {

#' AthenaJDBC
#'
#' Connect to Athena
#'
#' @section Driver Configuration Options:
#'
#' - `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
#' - `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
#' - `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
#'
#' @param provider JDBC auth provider (ideally leave default)
#' @param region AWS region the Athena tables are in
#' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
@@ -46,8 +54,9 @@ Athena <- function(identifier.quote = '`') {
#' of data in logs. Set this to a temporary directory or something log4j can use. For
#' `log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
#' their corresponding integer values 0-6.
#' @param ... unused
#' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
#' @param ... passed on to the driver. See Details.
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @export
setMethod(

@@ -60,6 +69,7 @@ setMethod(
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default",
fetch_size = 1000L,
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
@@ -87,7 +97,11 @@ setMethod(
...
) -> jc

return(as(jc, "AthenaConnection"))

jc <- as(jc, "AthenaConnection")
jc@fetch_size <- as.integer(fetch_size)

return(jc)

}

@@ -96,7 +110,9 @@ setMethod(
#' AthenaJDBC
#'
#' @export
setClass("AthenaConnection", contains = "JDBCConnection")
setClass("AthenaConnection", representation("JDBCConnection", jc="jobjRef", identifier.quote="character", fetch_size="integer"))

# setClass("AthenaConnection", contains = "JDBCConnection")

#' AthenaJDBC
#'


+ 2
- 1
R/metis-lite-package.R Dosyayı Görüntüle

@@ -1,4 +1,4 @@
#' Helpers for Accessing and Querying Amazon Athena
#' Access and Query Amazon Athena via DBI/JDBC
#'
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@@ -27,6 +27,7 @@
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC DBI bit64 dbplyr
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
NULL




+ 15
- 16
R/metis.r Dosyayı Görüntüle

@@ -1,4 +1,4 @@
#' Make a JDBC connection to Athena
#' Simplified Athena JDBC connection helper
#'
#' Handles the up-front JDBC config
#'
@@ -14,14 +14,19 @@
#' @param log_path local path of the Athena JDBC driver logs. If no log path is provided, then no log files are created.
#' @param log_level log level of the Athena JDBC driver logs. Use names
#' "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".
#' @param ... passed on to the driver
#' @export
#' @references [Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
#' [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
#' @examples \dontrun{
#' use_credentials("personal")
#'
#' ath <- athena_connect(default_schema = "sampledb",
#' s3_staging_dir = "s3://accessible-bucket",
#' log_path = "/tmp/athena.log",
#' log_level = "DEBUG")
#' athena_connect(
#' default_schema = "sampledb",
#' s3_staging_dir = "s3://accessible-bucket",
#' log_path = "/tmp/athena.log",
#' log_level = "DEBUG"
#' ) -> ath
#'
#' dbListTables(ath)
#'
@@ -35,17 +40,16 @@ athena_connect <- function(
max_error_retries = 10,
connection_timeout = 10000,
socket_timeout = 10000,
# retry_base_delay = 100,
# retry_max_backoff_time = 1000,
log_path = "",
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE")) {
log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"),
...
) {

athena_jdbc <- Athena()

region <- match.arg(region, c("us-east-1", "us-east-2", "us-west-2"))
log_level <- match.arg(log_level, c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))

# if (!simple) {
dbConnect(
athena_jdbc,
schema_name = default_schema,
@@ -54,15 +58,10 @@ athena_connect <- function(
max_error_retries = max_error_retries,
connection_timeout = connection_timeout,
socket_timeout = socket_timeout,
# retry_base_delay = retry_base_delay,
# retry_max_backoff_time = retry_max_backoff_time,
log_path = log_path,
log_level = log_level
log_level = log_level,
...
) -> con
# } else {
# con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema, region = region,
# s3_staging_dir = s3_staging_dir, log_path = log_path, log_level = log_level)
# }

con



+ 10
- 4
R/z-dbGetQuery.R Dosyayı Görüntüle

@@ -16,10 +16,11 @@ list(
"7" = as.double, # REAL
"8" = as.double, # DOUBLE
"12" = as.character, # VARCHAR
"16" = as.logical, # BOOLEAN
"16" = as_logical, # BOOLEAN
"91" = as_date, # DATE
"92" = as.character, # TIME
"93" = as_posixct, # TIMESTAMP
"2003" = as.character, # ARRAY
"1111" = as.character # OTHER
) -> .jdbc_converters

@@ -71,14 +72,19 @@ setMethod(
ct <- as.character(.jcall(res@md, "I", "getColumnType", i))
athena_type_convert[[i]] <- .jdbc_converters[[ct]]
nms <- c(nms, .jcall(res@md, "S", "getColumnLabel", i))
# message(ct, "|", tail(nms, 1))
}

athena_type_convert <- set_names(athena_type_convert, nms)

out <- callNextMethod(res = res, n = n, block = block, ...)

# print(str(out))

for (nm in names(athena_type_convert)) {
out[[nm]] <- athena_type_convert[[nm]](out[[nm]])
f <- athena_type_convert[[nm]]
if (length(f) == 0) f <- as.character # catchall in case AMZN is tricksy
out[[nm]] <- f(out[[nm]])
}

out
@@ -98,13 +104,13 @@ setMethod(
"dbGetQuery",
signature(conn="AthenaConnection", statement="character"),

definition = function(conn, statement, type_convert=FALSE, ...) {
definition = function(conn, statement, ...) {

r <- dbSendQuery(conn, statement, ...)

on.exit(.jcall(r@stat, "V", "close"))

res <- fetch(r, -1, block = 1000L)
res <- fetch(r, -1, block = conn@fetch_size)

class(res) <- c("tbl_df", "tbl", "data.frame")



+ 8
- 1
R/zzz.R Dosyayı Görüntüle

@@ -1,4 +1,11 @@
.onLoad <- function(libname, pkgname) {
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname)
# rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE))
o <- getOption("java.parameters", "")
if (!any(grepl("-Xrs", o))) {
packageStartupMessage(
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ",
"please set this up in your/an Rprofile or at the start of scripts."
)
}
}

+ 87
- 42
README.Rmd Dosyayı Görüntüle

@@ -4,25 +4,14 @@ editor_options:
chunk_output_type: console
---

# `metis`
# metis

Helpers for Accessing and Querying Amazon Athena

Including a lightweight RJDBC shim.

In Greek mythology, Metis was Athena's "helper".
Access and Query Amazon Athena via DBI/JDBC

## Description

Still fairly beta-quality level but getting there.

The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connections from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.

The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but that will likely move to a separate package as this gets closer to prime time if this goes on CRAN.

NOTE that the updated driver *REQUIRES JDK 1.8+*.

See the **Usage** section for an example.
In Greek mythology, Metis was Athena's "helper" so methods are provided to help you accessing and querying Amazon Athena via DBI/JDBC and/or `dplyr`.
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,

## IMPORTANT

@@ -41,7 +30,7 @@ The following functions are implemented:

Easy-interface connection helper:

- `athena_connect` Make a JDBC connection to Athena
- `athena_connect` Simplified Athena JDBC connection helper

Custom JDBC Classes:

@@ -52,13 +41,13 @@ Custom JDBC Classes:

Custom JDBC Class Methods:

- `dbConnect-method`: AthenaJDBC
- `dbExistsTable-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbListFields-method`: AthenaJDBC
- `dbListTables-method`: AthenaJDBC
- `dbReadTable-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`

Pulled in from other `cloudyr` pkgs:

@@ -68,41 +57,97 @@ Pulled in from other `cloudyr` pkgs:
## Installation

```{r eval=FALSE}
devtools::install_github("hrbrmstr/metis")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
# OR
devtools::install_github("hrbrmstr/metis-lite")
```

```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
```{r message=FALSE, warning=FALSE, include=FALSE}
options(width=120)
```

## Usage

```{r message=FALSE, warning=FALSE, error=FALSE}
library(metis)
library(tidyverse)
```{r message=FALSE, warning=FALSE}
library(metis.lite)

# current verison
packageVersion("metis")
packageVersion("metis.lite")
```

```{r message=FALSE, warning=FALSE, error=FALSE}
use_credentials("default")
```{r message=FALSE, warning=FALSE}
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)

athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath
dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con

dbListTables(ath, schema="sampledb")
dbListTables(con, schema="sampledb")

dbExistsTable(ath, "elb_logs", schema="sampledb")
dbExistsTable(con, "elb_logs", schema="sampledb")

dbListFields(ath, "elb_logs", "sampledb")
dbListFields(con, "elb_logs", "sampledb")

dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
```

### Check types

```{r}
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
glimpse()
```

dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
type_convert() %>%
#### dplyr

```{r}
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
```



+ 143
- 64
README.md Dosyayı Görüntüle

@@ -1,35 +1,25 @@

# `metis`
# metis

Helpers for Accessing and Querying Amazon Athena

Including a lightweight RJDBC shim.

In Greek mythology, Metis was Athena’s “helper”.
Access and Query Amazon Athena via DBI/JDBC

## Description

Still fairly beta-quality level but getting there.

The goal will be to get around enough of the “gotchas” that are
preventing raw RJDBC Athena connections from “just working” with `dplyr`
v0.6.0+ and also get around the [`fetchSize`
problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/)
without having to not use `dbGetQuery()`.

The `AthenaJDBC42_2.0.2.jar` JAR file is included out of convenience but
that will likely move to a separate package as this gets closer to prime
time if this goes on CRAN.

NOTE that the updated driver *REQUIRES JDK 1.8+*.

See the **Usage** section for an example.
In Greek mythology, Metis was Athena’s “helper” so methods are provided
to help you accessing and querying Amazon Athena via DBI/JDBC and/or
`dplyr`. \#’ Methods are provides to connect to ‘Amazon’ ‘Athena’,
lookup schemas/tables,

## IMPORTANT

Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting Athena JDBC calls crash the R interpreter. You need to set the `-Xrs` option to avoid signals being passed on to the JVM owner. That has to be done _before_ `rJava` is loaded so you either need to remember to put it at the top of all scripts _or_ stick this in your local `~/.Rprofile` and/or sitewide `Rprofile`:
Since R 3.5 (I don’t remember this happening in R 3.4.x) signals sent
from interrupting Athena JDBC calls crash the R interpreter. You need to
set the `-Xrs` option to avoid signals being passed on to the JVM owner.
That has to be done *before* `rJava` is loaded so you either need to
remember to put it at the top of all scripts *or* stick this in your
local `~/.Rprofile` and/or sitewide `Rprofile`:

```r
``` r
if (!grepl("-Xrs", getOption("java.parameters", ""))) {
options(
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs")
@@ -43,7 +33,7 @@ The following functions are implemented:

Easy-interface connection helper:

- `athena_connect` Make a JDBC connection to Athena
- `athena_connect` Simplified Athena JDBC connection helper

Custom JDBC Classes:

@@ -54,13 +44,13 @@ Custom JDBC Classes:

Custom JDBC Class Methods:

- `dbConnect-method`: AthenaJDBC
- `dbExistsTable-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbListFields-method`: AthenaJDBC
- `dbListTables-method`: AthenaJDBC
- `dbReadTable-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC
- `dbConnect-method`
- `dbExistsTable-method`
- `dbGetQuery-method`
- `dbListFields-method`
- `dbListTables-method`
- `dbReadTable-method`
- `dbSendQuery-method`

Pulled in from other `cloudyr` pkgs:

@@ -70,44 +60,53 @@ Pulled in from other `cloudyr` pkgs:
## Installation

``` r
devtools::install_github("hrbrmstr/metis")
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-lite")
# OR
devtools::install_gitlab("hrbrmstr/metis-lite")
# OR
devtools::install_github("hrbrmstr/metis-lite")
```

## Usage

``` r
library(metis)
library(tidyverse)
library(metis.lite)

# current verison
packageVersion("metis")
packageVersion("metis.lite")
```

## [1] '0.3.0'

``` r
use_credentials("default")

athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath

dbListTables(ath, schema="sampledb")
library(rJava)
library(RJDBC)
library(metis.lite)
library(magrittr)
library(dbplyr)
library(dplyr)

dbConnect(
drv = metis.lite::Athena(),
schema_name = "sampledb",
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"),
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1",
) -> con

dbListTables(con, schema="sampledb")
```

## [1] "elb_logs"

``` r
dbExistsTable(ath, "elb_logs", schema="sampledb")
dbExistsTable(con, "elb_logs", schema="sampledb")
```

## [1] TRUE

``` r
dbListFields(ath, "elb_logs", "sampledb")
dbListFields(con, "elb_logs", "sampledb")
```

## [1] "timestamp" "elbname" "requestip" "requestport"
@@ -116,29 +115,109 @@ dbListFields(ath, "elb_logs", "sampledb")
## [13] "sentbytes" "requestverb" "url" "protocol"

``` r
dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
type_convert() %>%
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10") %>%
glimpse()
```

## Observations: 10
## Variables: 16
## $ timestamp <dttm> 2014-09-30 01:28:17, 2014-09-30 00:01:30, 2014-09-30 00:01:30, 2014-09-30 00:01:30, ...
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo...
## $ requestip <chr> "246.140.190.136", "240.109.129.138", "242.251.232.153", "253.227.207.81", "253.227.2...
## $ requestport <dbl> 63777, 22705, 22705, 22705, 23282, 24178, 22916, 23807, 22916, 21443
## $ backendip <chr> "250.193.168.100", "251.103.130.45", "243.140.114.254", "243.82.95.243", "246.129.102...
## $ backendport <dbl> 8888, 8888, 8888, 8888, 8899, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 7.2e-05, 6.9e-05, 8.7e-05, 9.7e-05, 8.1e-05, 4.6e-05, 4.3e-05, 5.3e-05, 5.5e-05, 4.4e-05
## $ backendprocessingtime <dbl> 0.379241, 0.007541, 0.187126, 0.413337, 0.037030, 0.050222, 0.043706, 0.045953, 0.015...
## $ clientresponsetime <dbl> 8.0e-05, 4.3e-05, 7.5e-05, 8.7e-05, 4.5e-05, 3.3e-05, 3.3e-05, 6.9e-05, 8.5e-05, 4.9e-05
## $ elbresponsecode <int> 200, 302, 302, 200, 200, 200, 200, 200, 200, 200
## $ backendresponsecode <int> 200, 200, 200, 400, 200, 200, 200, 404, 200, 200
## $ receivedbytes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <dbl> 58402, 0, 0, 58402, 32370, 20766, 3408, 152213, 84245, 3884
## $ timestamp <chr> "2014-09-29T18:18:51.826955Z", "2014-09-29T18:18:51.920462Z", "2014-09-29T18:18:52.2725…
## $ elbname <chr> "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo", "lb-demo",…
## $ requestip <chr> "255.48.150.122", "249.213.227.93", "245.108.120.229", "241.112.203.216", "241.43.107.2…
## $ requestport <int> 62096, 62096, 62096, 62096, 56454, 33254, 18918, 64352, 1651, 56454
## $ backendip <chr> "244.238.214.120", "248.99.214.228", "243.3.190.175", "246.235.181.255", "241.112.203.2…
## $ backendport <int> 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888, 8888
## $ requestprocessingtime <dbl> 9.0e-05, 9.7e-05, 8.7e-05, 9.4e-05, 7.6e-05, 8.3e-05, 6.3e-05, 5.4e-05, 8.2e-05, 8.7e-05
## $ backendprocessingtime <dbl> 0.007410, 0.256533, 0.442659, 0.016772, 0.035036, 0.029892, 0.034148, 0.014858, 0.01518…
## $ clientresponsetime <dbl> 0.000055, 0.000075, 0.000131, 0.000078, 0.000057, 0.000043, 0.000033, 0.000043, 0.00007…
## $ elbresponsecode <chr> "302", "302", "200", "200", "200", "200", "200", "200", "200", "200"
## $ backendresponsecode <chr> "200", "200", "200", "200", "200", "200", "200", "200", "200", "200"
## $ receivedbytes <S3: integer64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ sentbytes <S3: integer64> 0, 0, 58402, 152213, 20766, 32370, 3408, 3884, 84245, 3831
## $ requestverb <chr> "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET", "GET"
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/a...
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "...
## $ url <chr> "http://www.abcxyz.com:80/", "http://www.abcxyz.com:80/accounts/login/?next=/", "http:/…
## $ protocol <chr> "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HTTP/1.1", "HT…

### Check types

``` r
dbGetQuery(con, "
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr1,
ARRAY['1', '2, 3', '4'] AS arr2,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
") %>%
glimpse()
```

## Observations: 1
## Variables: 13
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-26
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr1 <chr> "1, 2, 3"
## $ arr2 <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""

#### dplyr

``` r
tbl(con, sql("
SELECT
CAST('chr' AS CHAR(4)) achar,
CAST('varchr' AS VARCHAR) avarchr,
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
CAST(100.1 AS DOUBLE) AS justadbl,
CAST(127 AS TINYINT) AS asmallint,
CAST(100 AS INTEGER) AS justanint,
CAST(100000000000000000 AS BIGINT) AS abigint,
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
ARRAY[1, 2, 3] AS arr,
ARRAY['1', '2, 3', '4'] AS arr,
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
CAST('{\"a\":1}' AS JSON) js
FROM elb_logs
LIMIT 1
")) %>%
glimpse()
```

## Observations: ??
## Variables: 13
## Database: AthenaConnection
## $ achar <chr> "chr "
## $ avarchr <chr> "varchr"
## $ tsday <date> 2014-09-27
## $ justadbl <dbl> 100.1
## $ asmallint <int> 127
## $ justanint <int> 100
## $ abigint <S3: integer64> 100000000000000000
## $ is_get <lgl> TRUE
## $ arr <chr> "1, 2, 3"
## $ arr <chr> "1, 2, 3, 4"
## $ mp <chr> "{bar=2, foo=1}"
## $ rw <chr> "{x=1, y=2.0}"
## $ js <chr> "\"{\\\"a\\\":1}\""

## Code of Conduct



+ 14
- 6
man/athena_connect.Rd Dosyayı Görüntüle

@@ -2,14 +2,14 @@
% Please edit documentation in R/metis.r
\name{athena_connect}
\alias{athena_connect}
\title{Make a JDBC connection to Athena}
\title{Simplified Athena JDBC connection helper}
\usage{
athena_connect(default_schema = "default", region = c("us-east-1",
"us-east-2", "us-west-2"),
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = c("OFF",
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
"FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"), ...)
}
\arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}
@@ -28,6 +28,8 @@ athena_connect(default_schema = "default", region = c("us-east-1",

\item{log_level}{log level of the Athena JDBC driver logs. Use names
"OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE".}

\item{...}{passed on to the driver}
}
\description{
Handles the up-front JDBC config
@@ -36,10 +38,12 @@ Handles the up-front JDBC config
\dontrun{
use_credentials("personal")

ath <- athena_connect(default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG")
athena_connect(
default_schema = "sampledb",
s3_staging_dir = "s3://accessible-bucket",
log_path = "/tmp/athena.log",
log_level = "DEBUG"
) -> ath

dbListTables(ath)

@@ -47,3 +51,7 @@ dbGetQuery(ath, "SELECT * FROM sampledb.elb_logs LIMIT 1")

}
}
\references{
\href{https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html}{Connect with JDBC};
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}

+ 15
- 6
man/dbConnect-AthenaDriver-method.Rd Dosyayı Görüntüle

@@ -9,9 +9,9 @@
provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
region = "us-east-1",
s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name = "default", max_error_retries = 10,
connection_timeout = 10000, socket_timeout = 10000, log_path = "",
log_level = 0, ...)
schema_name = "default", fetch_size = 1000L,
max_error_retries = 10, connection_timeout = 10000,
socket_timeout = 10000, log_path = "", log_level = 0, ...)
}
\arguments{
\item{provider}{JDBC auth provider (ideally leave default)}
@@ -29,11 +29,20 @@ of data in logs. Set this to a temporary directory or something log4j can use. F
`log_level` use the names ("INFO", "DEBUG", "WARN", "ERROR", "ALL", "OFF", "FATAL", "TRACE") or
their corresponding integer values 0-6.}

\item{...}{unused}
\item{...}{passed on to the driver. See Details.}
}
\description{
AthenaJDBC
Connect to Athena
}
\section{Driver Configuration Options}{


- `BinaryColumnLength`: <int> The maximum data length for `BINARY` columns. Default `32767L`
- `ComplexTypeColumnLength`: <int> The maximum data length for `ARRAY`, `MAP`, and `STRUCT` columns. Default `65535L`
- `StringColumnLength`: <int> The maximum data length for `STRING` columns. Default `255L`
}

\references{
<https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
[Connect with JDBC](https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html);
[Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf)
}

+ 1
- 2
man/dbGetQuery-AthenaConnection-character-method.Rd Dosyayı Görüntüle

@@ -5,8 +5,7 @@
\alias{dbGetQuery,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement,
type_convert = FALSE, ...)
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
}
\arguments{
\item{conn}{Athena connection}


+ 4
- 1
man/metis.lite.Rd Dosyayı Görüntüle

@@ -5,7 +5,7 @@
\name{metis.lite}
\alias{metis.lite}
\alias{metis.lite-package}
\title{Helpers for Accessing and Querying Amazon Athena}
\title{Access and Query Amazon Athena via DBI/JDBC}
\description{
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables,
perform queries and retrieve query results. A lightweight 'RJDBC' implementation
@@ -26,6 +26,9 @@ in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatte
}
}

\references{
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide}
}
\author{
Bob Rudis (bob@rud.is)
}


Yükleniyor…
İptal
Kaydet