commit a01ab3351fc87f545aa00623da4183b200a6a89f Author: Bob Rudis Date: Mon May 22 14:59:55 2017 -0400 initial commit diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..1c60b19 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,10 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.travis\.yml$ +^README\.*Rmd$ +^README\.*html$ +^NOTES\.*Rmd$ +^NOTES\.*html$ +^\.codecov\.yml$ +^README_files$ +^doc$ diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1 @@ +comment: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cce1f17 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +.Rproj.user +.Rhistory +.RData +.Rproj +src/*.o +src/*.so +src/*.dll diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..76d9586 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +language: r + +warnings_are_errors: true + +sudo: required + +cache: packages + +r: + - oldrel + - release + - devel + +apt_packages: + - libv8-dev + - xclip + +env: + global: + - CRAN: http://cran.rstudio.com + +after_success: + - Rscript -e 'covr::codecov()' + +notifications: + email: + - bob@rud.is + irc: + channels: + - "104.236.112.222#builds" + nick: travisci diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..0feb6ed --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,22 @@ +Package: metis +Type: Package +Title: Helpers for Accessing and Querying Amazon Athena +Version: 0.1.0 +Date: 2017-05-16 +Author: Bob Rudis (bob@rud.is) +Maintainer: Bob Rudis +Description: Helpers for Accessing and Querying Amazon Athena. Including a lightweight RJDBC shim. +URL: https://github.com/hrbrmstr/metis +BugReports: https://github.com/hrbrmstr/metis/issues +License: AGPL +Suggests: + testthat, + covr +Depends: + R (>= 3.2.0), + RJDBC +Imports: + DBI, + dplyr, + ini +RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..d55eeb7 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,14 @@ +# Generated by roxygen2: do not edit by hand + +export(Athena) +export(athena_connect) +exportClasses(AthenaConnection) +exportClasses(AthenaDriver) +exportClasses(AthenaResult) +exportMethods(dbConnect) +exportMethods(dbGetQuery) +exportMethods(dbSendQuery) +import(DBI) +import(RJDBC) +import(dplyr) +import(ini) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..9b4679b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,2 @@ +0.1.0 +* Initial release diff --git a/R/jdbc.r b/R/jdbc.r new file mode 100644 index 0000000..319e68b --- /dev/null +++ b/R/jdbc.r @@ -0,0 +1,91 @@ +#' AthenaJDBC +#' +#' @export +setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef")) + +#' AthenaJDBC +#' +#' @export +Athena <- function(identifier.quote='`') { + drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver", + system.file("AthenaJDBC41-1.0.1.jar", package="metis"), + identifier.quote="'") + return(as(drv, "AthenaDriver")) +} + +#' AthenaJDBC +#' +#' @export +setMethod( + + "dbConnect", + "AthenaDriver", + + def = function(drv, + provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", + conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', + schema_name, ...) { + + if (!is.null(provider)) { + + jc <- callNextMethod(drv, conn_string, + s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), + schema_name=schema_name, + aws_credentials_provider_class=provider, ...) + + } else { + + jc <- callNextMethod(drv, + 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', + s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), + schema_name=schema_name, + user = Sys.getenv("AWS_ACCESS_KEY_ID"), + password = Sys.getenv("AWS_SECRET_ACCESS_KEY")) + + } + + return(as(jc, "AthenaConnection")) + + } + +) + +#' AthenaJDBC +#' +#' @export +setClass("AthenaConnection", contains = "JDBCConnection") + +#' AthenaJDBC +#' +#' @export +setClass("AthenaResult", contains = "JDBCResult") + +#' AthenaJDBC +#' +#' @export +setMethod( + + "dbSendQuery", + "AthenaDriver", + + def = function(conn, statement, ...) { + return(as(callNextMethod(), "AthenaResult")) + } + +) + +#' AthenaJDBC +#' +#' @export +setMethod( + + "dbGetQuery", + signature(conn="AthenaConnection", statement="character"), + + def = function(conn, statement, ...) { + r <- dbSendQuery(conn, statement, ...) + on.exit(.jcall(r@stat, "V", "close")) + dplyr::tbl_df(fetch(r, -1, block=256)) + } + +) diff --git a/R/metis-package.R b/R/metis-package.R new file mode 100644 index 0000000..e44dd01 --- /dev/null +++ b/R/metis-package.R @@ -0,0 +1,12 @@ +#' Helpers for Accessing and Querying Amazon Athena +#' +#' Including a lightweight RJDBC shim. +#' +#' @name metis +#' @docType package +#' @author Bob Rudis (bob@@rud.is) +#' @import RJDBC +#' @import DBI +#' @import dplyr +#' @import ini +NULL diff --git a/R/metis.r b/R/metis.r new file mode 100644 index 0000000..221fce3 --- /dev/null +++ b/R/metis.r @@ -0,0 +1,48 @@ +#' Make a JDBC connection to Athena +#' +#' Handles the up-front JDBC config +#' +#' For all connection types it is expected that you have the following environment variables +#' defined (a good place is `~/.Renviron`): +#' +#' - `AWS_S3_STAGING_DIR`: the name of the S3 bucket where Athena can write stuff +#' - `AWS_PROFILE`: the AWS profile ID in `~/.aws/credentials` (defaults to `default` if not present) +#' +#' For `simple` == `FALSE` the expectation is that you're working with a managed +#' `~/.aws/credentials` file. +#' +#' @md +#' @param default_schema def sch +#' @param simple simple +#' @export +athena_connect <- function(default_schema, simple=FALSE) { + + athena_jdbc <- Athena() + + aws_config <- ini::read.ini(path.expand("~/.aws/credentials")) + aws_profile <- aws_config[Sys.getenv("AWS_PROFILE", "default")][[1]] + + Sys.unsetenv("AWS_ACCESS_KEY_ID") + Sys.unsetenv("AWS_SECRET_ACCESS_KEY") + + Sys.setenv(AWS_ACCESS_KEY_ID = aws_profile$aws_access_key_id) + Sys.setenv(AWS_SECRET_ACCESS_KEY = aws_profile$aws_secret_access_key) + + con <- NULL + + if (!simple) { + + Sys.unsetenv("AWS_SESSION_TOKEN") + Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token) + + con <- dbConnect(athena_jdbc, schema_name = default_schema) + + } else { + + con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema) + + } + + con + +} diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..6e5adcb --- /dev/null +++ b/README.Rmd @@ -0,0 +1,63 @@ +--- +output: rmarkdown::github_document +--- + +[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena + +Including a lightweight RJDBC shim. + +![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) + +THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. + +The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena +connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. + +It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). + +See the **Usage** section for an example. + +The following functions are implemented: + +- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) +- `Athena`: AthenaJDBC` +- `AthenaConnection-class`: AthenaJDBC +- `AthenaDriver-class`: AthenaJDBC +- `AthenaResult-class`: AthenaJDBC +- `dbConnect-method`: AthenaJDBC +- `dbGetQuery-method`: AthenaJDBC +- `dbSendQuery-method`: AthenaJDBC + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/metis") +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} +options(width=120) +``` + +### Usage + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(metis) +library(dplyr) + +# current verison +packageVersion("metis") +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE} +ath <- athena_connect("your_schema_name") + +res <- dbGetQuery(ath, " +SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, + port as field, count(port) cnt_field FROM your_schema_name.your_table_name + WHERE CONTAINS(ARRAY['201705'], date) + AND port IN (445, 139, 3389) + AND timestamp > date '2017-05-01' + AND timestamp <= date '2017-05-22' +GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 +") +``` diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a5778f --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ + +[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena + +Including a lightweight RJDBC shim. + +![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) + +THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. + +The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. + +It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). + +See the **Usage** section for an example. + +The following functions are implemented: + +- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) +- `Athena`: AthenaJDBC\` +- `AthenaConnection-class`: AthenaJDBC +- `AthenaDriver-class`: AthenaJDBC +- `AthenaResult-class`: AthenaJDBC +- `dbConnect-method`: AthenaJDBC +- `dbGetQuery-method`: AthenaJDBC +- `dbSendQuery-method`: AthenaJDBC + +### Installation + +``` r +devtools::install_github("hrbrmstr/metis") +``` + +### Usage + +``` r +library(metis) +library(dplyr) + +# current verison +packageVersion("metis") +``` + + ## [1] '0.1.0' + +``` r +ath <- athena_connect("your_schema_name") + +res <- dbGetQuery(ath, " +SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, + port as field, count(port) cnt_field FROM your_schema_name.your_table_name + WHERE CONTAINS(ARRAY['201705'], date) + AND port IN (445, 139, 3389) + AND timestamp > date '2017-05-01' + AND timestamp <= date '2017-05-22' +GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 +") +``` diff --git a/inst/AthenaJDBC41-1.0.1.jar b/inst/AthenaJDBC41-1.0.1.jar new file mode 100644 index 0000000..3208676 Binary files /dev/null and b/inst/AthenaJDBC41-1.0.1.jar differ diff --git a/man/Athena.Rd b/man/Athena.Rd new file mode 100644 index 0000000..11c7752 --- /dev/null +++ b/man/Athena.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\name{Athena} +\alias{Athena} +\title{AthenaJDBC} +\usage{ +Athena(identifier.quote = "`") +} +\description{ +AthenaJDBC +} diff --git a/man/AthenaConnection-class.Rd b/man/AthenaConnection-class.Rd new file mode 100644 index 0000000..d788751 --- /dev/null +++ b/man/AthenaConnection-class.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{class} +\name{AthenaConnection-class} +\alias{AthenaConnection-class} +\title{AthenaJDBC} +\description{ +AthenaJDBC +} diff --git a/man/AthenaDriver-class.Rd b/man/AthenaDriver-class.Rd new file mode 100644 index 0000000..3457057 --- /dev/null +++ b/man/AthenaDriver-class.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{class} +\name{AthenaDriver-class} +\alias{AthenaDriver-class} +\title{AthenaJDBC} +\description{ +AthenaJDBC +} diff --git a/man/AthenaResult-class.Rd b/man/AthenaResult-class.Rd new file mode 100644 index 0000000..ea55534 --- /dev/null +++ b/man/AthenaResult-class.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{class} +\name{AthenaResult-class} +\alias{AthenaResult-class} +\title{AthenaJDBC} +\description{ +AthenaJDBC +} diff --git a/man/athena_connect.Rd b/man/athena_connect.Rd new file mode 100644 index 0000000..de9ef71 --- /dev/null +++ b/man/athena_connect.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metis.r +\name{athena_connect} +\alias{athena_connect} +\title{Make a JDBC connection to Athena} +\usage{ +athena_connect(default_schema, simple = FALSE) +} +\arguments{ +\item{default_schema}{def sch} + +\item{simple}{simple} +} +\description{ +Handles the up-front JDBC config +} +\details{ +For all connection types it is expected that you have the following environment variables +defined (a good place is \code{~/.Renviron}): +\itemize{ +\item \code{AWS_S3_STAGING_DIR}: the name of the S3 bucket where Athena can write stuff +\item \code{AWS_PROFILE}: the AWS profile ID in \code{~/.aws/credentials} (defaults to \code{default} if not present) +} + +For \code{simple} == \code{FALSE} the expectation is that you're working with a managed +\code{~/.aws/credentials} file. +} diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd new file mode 100644 index 0000000..1204874 --- /dev/null +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{methods} +\name{dbConnect,AthenaDriver-method} +\alias{dbConnect,AthenaDriver-method} +\title{AthenaJDBC} +\usage{ +\S4method{dbConnect}{AthenaDriver}(drv, + provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", + conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/", + schema_name, ...) +} +\description{ +AthenaJDBC +} diff --git a/man/dbGetQuery-AthenaConnection-character-method.Rd b/man/dbGetQuery-AthenaConnection-character-method.Rd new file mode 100644 index 0000000..89330f4 --- /dev/null +++ b/man/dbGetQuery-AthenaConnection-character-method.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{methods} +\name{dbGetQuery,AthenaConnection,character-method} +\alias{dbGetQuery,AthenaConnection,character-method} +\title{AthenaJDBC} +\usage{ +\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...) +} +\description{ +AthenaJDBC +} diff --git a/man/dbSendQuery-AthenaDriver-ANY-method.Rd b/man/dbSendQuery-AthenaDriver-ANY-method.Rd new file mode 100644 index 0000000..c2d8c60 --- /dev/null +++ b/man/dbSendQuery-AthenaDriver-ANY-method.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jdbc.r +\docType{methods} +\name{dbSendQuery,AthenaDriver,ANY-method} +\alias{dbSendQuery,AthenaDriver,ANY-method} +\title{AthenaJDBC} +\usage{ +\S4method{dbSendQuery}{AthenaDriver,ANY}(conn, statement, ...) +} +\description{ +AthenaJDBC +} diff --git a/man/metis.Rd b/man/metis.Rd new file mode 100644 index 0000000..e6adc99 --- /dev/null +++ b/man/metis.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metis-package.R +\docType{package} +\name{metis} +\alias{metis} +\alias{metis-package} +\title{Helpers for Accessing and Querying Amazon Athena} +\description{ +Including a lightweight RJDBC shim. +} +\author{ +Bob Rudis (bob@rud.is) +} diff --git a/metis.Rproj b/metis.Rproj new file mode 100644 index 0000000..446d9e1 --- /dev/null +++ b/metis.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --resave-data +PackageRoxygenize: rd,collate,namespace diff --git a/tests/test-all.R b/tests/test-all.R new file mode 100644 index 0000000..9bebc31 --- /dev/null +++ b/tests/test-all.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("metis") diff --git a/tests/testthat/test-metis.R b/tests/testthat/test-metis.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-metis.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})