mirror of https://git.sr.ht/~hrbrmstr/metis-jars
boB Rudis
7 years ago
commit
a01ab3351f
25 changed files with 505 additions and 0 deletions
@ -0,0 +1,10 @@ |
|||||
|
^.*\.Rproj$ |
||||
|
^\.Rproj\.user$ |
||||
|
^\.travis\.yml$ |
||||
|
^README\.*Rmd$ |
||||
|
^README\.*html$ |
||||
|
^NOTES\.*Rmd$ |
||||
|
^NOTES\.*html$ |
||||
|
^\.codecov\.yml$ |
||||
|
^README_files$ |
||||
|
^doc$ |
@ -0,0 +1 @@ |
|||||
|
comment: false |
@ -0,0 +1,8 @@ |
|||||
|
.DS_Store |
||||
|
.Rproj.user |
||||
|
.Rhistory |
||||
|
.RData |
||||
|
.Rproj |
||||
|
src/*.o |
||||
|
src/*.so |
||||
|
src/*.dll |
@ -0,0 +1,31 @@ |
|||||
|
language: r |
||||
|
|
||||
|
warnings_are_errors: true |
||||
|
|
||||
|
sudo: required |
||||
|
|
||||
|
cache: packages |
||||
|
|
||||
|
r: |
||||
|
- oldrel |
||||
|
- release |
||||
|
- devel |
||||
|
|
||||
|
apt_packages: |
||||
|
- libv8-dev |
||||
|
- xclip |
||||
|
|
||||
|
env: |
||||
|
global: |
||||
|
- CRAN: http://cran.rstudio.com |
||||
|
|
||||
|
after_success: |
||||
|
- Rscript -e 'covr::codecov()' |
||||
|
|
||||
|
notifications: |
||||
|
email: |
||||
|
- bob@rud.is |
||||
|
irc: |
||||
|
channels: |
||||
|
- "104.236.112.222#builds" |
||||
|
nick: travisci |
@ -0,0 +1,22 @@ |
|||||
|
Package: metis |
||||
|
Type: Package |
||||
|
Title: Helpers for Accessing and Querying Amazon Athena |
||||
|
Version: 0.1.0 |
||||
|
Date: 2017-05-16 |
||||
|
Author: Bob Rudis (bob@rud.is) |
||||
|
Maintainer: Bob Rudis <bob@rud.is> |
||||
|
Description: Helpers for Accessing and Querying Amazon Athena. Including a lightweight RJDBC shim. |
||||
|
URL: https://github.com/hrbrmstr/metis |
||||
|
BugReports: https://github.com/hrbrmstr/metis/issues |
||||
|
License: AGPL |
||||
|
Suggests: |
||||
|
testthat, |
||||
|
covr |
||||
|
Depends: |
||||
|
R (>= 3.2.0), |
||||
|
RJDBC |
||||
|
Imports: |
||||
|
DBI, |
||||
|
dplyr, |
||||
|
ini |
||||
|
RoxygenNote: 6.0.1 |
@ -0,0 +1,14 @@ |
|||||
|
# Generated by roxygen2: do not edit by hand |
||||
|
|
||||
|
export(Athena) |
||||
|
export(athena_connect) |
||||
|
exportClasses(AthenaConnection) |
||||
|
exportClasses(AthenaDriver) |
||||
|
exportClasses(AthenaResult) |
||||
|
exportMethods(dbConnect) |
||||
|
exportMethods(dbGetQuery) |
||||
|
exportMethods(dbSendQuery) |
||||
|
import(DBI) |
||||
|
import(RJDBC) |
||||
|
import(dplyr) |
||||
|
import(ini) |
@ -0,0 +1,2 @@ |
|||||
|
0.1.0 |
||||
|
* Initial release |
@ -0,0 +1,91 @@ |
|||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef")) |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
Athena <- function(identifier.quote='`') { |
||||
|
drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver", |
||||
|
system.file("AthenaJDBC41-1.0.1.jar", package="metis"), |
||||
|
identifier.quote="'") |
||||
|
return(as(drv, "AthenaDriver")) |
||||
|
} |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setMethod( |
||||
|
|
||||
|
"dbConnect", |
||||
|
"AthenaDriver", |
||||
|
|
||||
|
def = function(drv, |
||||
|
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", |
||||
|
conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', |
||||
|
schema_name, ...) { |
||||
|
|
||||
|
if (!is.null(provider)) { |
||||
|
|
||||
|
jc <- callNextMethod(drv, conn_string, |
||||
|
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), |
||||
|
schema_name=schema_name, |
||||
|
aws_credentials_provider_class=provider, ...) |
||||
|
|
||||
|
} else { |
||||
|
|
||||
|
jc <- callNextMethod(drv, |
||||
|
'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', |
||||
|
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), |
||||
|
schema_name=schema_name, |
||||
|
user = Sys.getenv("AWS_ACCESS_KEY_ID"), |
||||
|
password = Sys.getenv("AWS_SECRET_ACCESS_KEY")) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
return(as(jc, "AthenaConnection")) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
) |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setClass("AthenaConnection", contains = "JDBCConnection") |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setClass("AthenaResult", contains = "JDBCResult") |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setMethod( |
||||
|
|
||||
|
"dbSendQuery", |
||||
|
"AthenaDriver", |
||||
|
|
||||
|
def = function(conn, statement, ...) { |
||||
|
return(as(callNextMethod(), "AthenaResult")) |
||||
|
} |
||||
|
|
||||
|
) |
||||
|
|
||||
|
#' AthenaJDBC |
||||
|
#' |
||||
|
#' @export |
||||
|
setMethod( |
||||
|
|
||||
|
"dbGetQuery", |
||||
|
signature(conn="AthenaConnection", statement="character"), |
||||
|
|
||||
|
def = function(conn, statement, ...) { |
||||
|
r <- dbSendQuery(conn, statement, ...) |
||||
|
on.exit(.jcall(r@stat, "V", "close")) |
||||
|
dplyr::tbl_df(fetch(r, -1, block=256)) |
||||
|
} |
||||
|
|
||||
|
) |
@ -0,0 +1,12 @@ |
|||||
|
#' Helpers for Accessing and Querying Amazon Athena |
||||
|
#' |
||||
|
#' Including a lightweight RJDBC shim. |
||||
|
#' |
||||
|
#' @name metis |
||||
|
#' @docType package |
||||
|
#' @author Bob Rudis (bob@@rud.is) |
||||
|
#' @import RJDBC |
||||
|
#' @import DBI |
||||
|
#' @import dplyr |
||||
|
#' @import ini |
||||
|
NULL |
@ -0,0 +1,48 @@ |
|||||
|
#' Make a JDBC connection to Athena |
||||
|
#' |
||||
|
#' Handles the up-front JDBC config |
||||
|
#' |
||||
|
#' For all connection types it is expected that you have the following environment variables |
||||
|
#' defined (a good place is `~/.Renviron`): |
||||
|
#' |
||||
|
#' - `AWS_S3_STAGING_DIR`: the name of the S3 bucket where Athena can write stuff |
||||
|
#' - `AWS_PROFILE`: the AWS profile ID in `~/.aws/credentials` (defaults to `default` if not present) |
||||
|
#' |
||||
|
#' For `simple` == `FALSE` the expectation is that you're working with a managed |
||||
|
#' `~/.aws/credentials` file. |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @param default_schema def sch |
||||
|
#' @param simple simple |
||||
|
#' @export |
||||
|
athena_connect <- function(default_schema, simple=FALSE) { |
||||
|
|
||||
|
athena_jdbc <- Athena() |
||||
|
|
||||
|
aws_config <- ini::read.ini(path.expand("~/.aws/credentials")) |
||||
|
aws_profile <- aws_config[Sys.getenv("AWS_PROFILE", "default")][[1]] |
||||
|
|
||||
|
Sys.unsetenv("AWS_ACCESS_KEY_ID") |
||||
|
Sys.unsetenv("AWS_SECRET_ACCESS_KEY") |
||||
|
|
||||
|
Sys.setenv(AWS_ACCESS_KEY_ID = aws_profile$aws_access_key_id) |
||||
|
Sys.setenv(AWS_SECRET_ACCESS_KEY = aws_profile$aws_secret_access_key) |
||||
|
|
||||
|
con <- NULL |
||||
|
|
||||
|
if (!simple) { |
||||
|
|
||||
|
Sys.unsetenv("AWS_SESSION_TOKEN") |
||||
|
Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token) |
||||
|
|
||||
|
con <- dbConnect(athena_jdbc, schema_name = default_schema) |
||||
|
|
||||
|
} else { |
||||
|
|
||||
|
con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
con |
||||
|
|
||||
|
} |
@ -0,0 +1,63 @@ |
|||||
|
--- |
||||
|
output: rmarkdown::github_document |
||||
|
--- |
||||
|
|
||||
|
[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena |
||||
|
|
||||
|
Including a lightweight RJDBC shim. |
||||
|
|
||||
|
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) |
||||
|
|
||||
|
THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. |
||||
|
|
||||
|
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena |
||||
|
connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. |
||||
|
|
||||
|
It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). |
||||
|
|
||||
|
See the **Usage** section for an example. |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) |
||||
|
- `Athena`: AthenaJDBC` |
||||
|
- `AthenaConnection-class`: AthenaJDBC |
||||
|
- `AthenaDriver-class`: AthenaJDBC |
||||
|
- `AthenaResult-class`: AthenaJDBC |
||||
|
- `dbConnect-method`: AthenaJDBC |
||||
|
- `dbGetQuery-method`: AthenaJDBC |
||||
|
- `dbSendQuery-method`: AthenaJDBC |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
```{r eval=FALSE} |
||||
|
devtools::install_github("hrbrmstr/metis") |
||||
|
``` |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} |
||||
|
options(width=120) |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE} |
||||
|
library(metis) |
||||
|
library(dplyr) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("metis") |
||||
|
``` |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE} |
||||
|
ath <- athena_connect("your_schema_name") |
||||
|
|
||||
|
res <- dbGetQuery(ath, " |
||||
|
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, |
||||
|
port as field, count(port) cnt_field FROM your_schema_name.your_table_name |
||||
|
WHERE CONTAINS(ARRAY['201705'], date) |
||||
|
AND port IN (445, 139, 3389) |
||||
|
AND timestamp > date '2017-05-01' |
||||
|
AND timestamp <= date '2017-05-22' |
||||
|
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 |
||||
|
") |
||||
|
``` |
@ -0,0 +1,57 @@ |
|||||
|
|
||||
|
[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena |
||||
|
|
||||
|
Including a lightweight RJDBC shim. |
||||
|
|
||||
|
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) |
||||
|
|
||||
|
THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. |
||||
|
|
||||
|
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. |
||||
|
|
||||
|
It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). |
||||
|
|
||||
|
See the **Usage** section for an example. |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) |
||||
|
- `Athena`: AthenaJDBC\` |
||||
|
- `AthenaConnection-class`: AthenaJDBC |
||||
|
- `AthenaDriver-class`: AthenaJDBC |
||||
|
- `AthenaResult-class`: AthenaJDBC |
||||
|
- `dbConnect-method`: AthenaJDBC |
||||
|
- `dbGetQuery-method`: AthenaJDBC |
||||
|
- `dbSendQuery-method`: AthenaJDBC |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
``` r |
||||
|
devtools::install_github("hrbrmstr/metis") |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
``` r |
||||
|
library(metis) |
||||
|
library(dplyr) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("metis") |
||||
|
``` |
||||
|
|
||||
|
## [1] '0.1.0' |
||||
|
|
||||
|
``` r |
||||
|
ath <- athena_connect("your_schema_name") |
||||
|
|
||||
|
res <- dbGetQuery(ath, " |
||||
|
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, |
||||
|
port as field, count(port) cnt_field FROM your_schema_name.your_table_name |
||||
|
WHERE CONTAINS(ARRAY['201705'], date) |
||||
|
AND port IN (445, 139, 3389) |
||||
|
AND timestamp > date '2017-05-01' |
||||
|
AND timestamp <= date '2017-05-22' |
||||
|
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 |
||||
|
") |
||||
|
``` |
Binary file not shown.
@ -0,0 +1,11 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\name{Athena} |
||||
|
\alias{Athena} |
||||
|
\title{AthenaJDBC} |
||||
|
\usage{ |
||||
|
Athena(identifier.quote = "`") |
||||
|
} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,9 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{class} |
||||
|
\name{AthenaConnection-class} |
||||
|
\alias{AthenaConnection-class} |
||||
|
\title{AthenaJDBC} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,9 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{class} |
||||
|
\name{AthenaDriver-class} |
||||
|
\alias{AthenaDriver-class} |
||||
|
\title{AthenaJDBC} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,9 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{class} |
||||
|
\name{AthenaResult-class} |
||||
|
\alias{AthenaResult-class} |
||||
|
\title{AthenaJDBC} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,27 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/metis.r |
||||
|
\name{athena_connect} |
||||
|
\alias{athena_connect} |
||||
|
\title{Make a JDBC connection to Athena} |
||||
|
\usage{ |
||||
|
athena_connect(default_schema, simple = FALSE) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{default_schema}{def sch} |
||||
|
|
||||
|
\item{simple}{simple} |
||||
|
} |
||||
|
\description{ |
||||
|
Handles the up-front JDBC config |
||||
|
} |
||||
|
\details{ |
||||
|
For all connection types it is expected that you have the following environment variables |
||||
|
defined (a good place is \code{~/.Renviron}): |
||||
|
\itemize{ |
||||
|
\item \code{AWS_S3_STAGING_DIR}: the name of the S3 bucket where Athena can write stuff |
||||
|
\item \code{AWS_PROFILE}: the AWS profile ID in \code{~/.aws/credentials} (defaults to \code{default} if not present) |
||||
|
} |
||||
|
|
||||
|
For \code{simple} == \code{FALSE} the expectation is that you're working with a managed |
||||
|
\code{~/.aws/credentials} file. |
||||
|
} |
@ -0,0 +1,15 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{methods} |
||||
|
\name{dbConnect,AthenaDriver-method} |
||||
|
\alias{dbConnect,AthenaDriver-method} |
||||
|
\title{AthenaJDBC} |
||||
|
\usage{ |
||||
|
\S4method{dbConnect}{AthenaDriver}(drv, |
||||
|
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", |
||||
|
conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/", |
||||
|
schema_name, ...) |
||||
|
} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,12 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{methods} |
||||
|
\name{dbGetQuery,AthenaConnection,character-method} |
||||
|
\alias{dbGetQuery,AthenaConnection,character-method} |
||||
|
\title{AthenaJDBC} |
||||
|
\usage{ |
||||
|
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...) |
||||
|
} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,12 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/jdbc.r |
||||
|
\docType{methods} |
||||
|
\name{dbSendQuery,AthenaDriver,ANY-method} |
||||
|
\alias{dbSendQuery,AthenaDriver,ANY-method} |
||||
|
\title{AthenaJDBC} |
||||
|
\usage{ |
||||
|
\S4method{dbSendQuery}{AthenaDriver,ANY}(conn, statement, ...) |
||||
|
} |
||||
|
\description{ |
||||
|
AthenaJDBC |
||||
|
} |
@ -0,0 +1,13 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/metis-package.R |
||||
|
\docType{package} |
||||
|
\name{metis} |
||||
|
\alias{metis} |
||||
|
\alias{metis-package} |
||||
|
\title{Helpers for Accessing and Querying Amazon Athena} |
||||
|
\description{ |
||||
|
Including a lightweight RJDBC shim. |
||||
|
} |
||||
|
\author{ |
||||
|
Bob Rudis (bob@rud.is) |
||||
|
} |
@ -0,0 +1,21 @@ |
|||||
|
Version: 1.0 |
||||
|
|
||||
|
RestoreWorkspace: Default |
||||
|
SaveWorkspace: Default |
||||
|
AlwaysSaveHistory: Default |
||||
|
|
||||
|
EnableCodeIndexing: Yes |
||||
|
UseSpacesForTab: Yes |
||||
|
NumSpacesForTab: 2 |
||||
|
Encoding: UTF-8 |
||||
|
|
||||
|
RnwWeave: Sweave |
||||
|
LaTeX: pdfLaTeX |
||||
|
|
||||
|
StripTrailingWhitespace: Yes |
||||
|
|
||||
|
BuildType: Package |
||||
|
PackageUseDevtools: Yes |
||||
|
PackageInstallArgs: --no-multiarch --with-keep.source |
||||
|
PackageBuildArgs: --resave-data |
||||
|
PackageRoxygenize: rd,collate,namespace |
@ -0,0 +1,2 @@ |
|||||
|
library(testthat) |
||||
|
test_check("metis") |
@ -0,0 +1,6 @@ |
|||||
|
context("basic functionality") |
||||
|
test_that("we can do something", { |
||||
|
|
||||
|
#expect_that(some_function(), is_a("data.frame")) |
||||
|
|
||||
|
}) |
Loading…
Reference in new issue