peilaus alkaen https://git.sr.ht/~hrbrmstr/metis-jars
boB Rudis
7 vuotta sitten
commit
a01ab3351f
25 muutettua tiedostoa jossa 505 lisäystä ja 0 poistoa
@ -0,0 +1,10 @@ |
|||
^.*\.Rproj$ |
|||
^\.Rproj\.user$ |
|||
^\.travis\.yml$ |
|||
^README\.*Rmd$ |
|||
^README\.*html$ |
|||
^NOTES\.*Rmd$ |
|||
^NOTES\.*html$ |
|||
^\.codecov\.yml$ |
|||
^README_files$ |
|||
^doc$ |
@ -0,0 +1 @@ |
|||
comment: false |
@ -0,0 +1,8 @@ |
|||
.DS_Store |
|||
.Rproj.user |
|||
.Rhistory |
|||
.RData |
|||
.Rproj |
|||
src/*.o |
|||
src/*.so |
|||
src/*.dll |
@ -0,0 +1,31 @@ |
|||
language: r |
|||
|
|||
warnings_are_errors: true |
|||
|
|||
sudo: required |
|||
|
|||
cache: packages |
|||
|
|||
r: |
|||
- oldrel |
|||
- release |
|||
- devel |
|||
|
|||
apt_packages: |
|||
- libv8-dev |
|||
- xclip |
|||
|
|||
env: |
|||
global: |
|||
- CRAN: http://cran.rstudio.com |
|||
|
|||
after_success: |
|||
- Rscript -e 'covr::codecov()' |
|||
|
|||
notifications: |
|||
email: |
|||
- bob@rud.is |
|||
irc: |
|||
channels: |
|||
- "104.236.112.222#builds" |
|||
nick: travisci |
@ -0,0 +1,22 @@ |
|||
Package: metis |
|||
Type: Package |
|||
Title: Helpers for Accessing and Querying Amazon Athena |
|||
Version: 0.1.0 |
|||
Date: 2017-05-16 |
|||
Author: Bob Rudis (bob@rud.is) |
|||
Maintainer: Bob Rudis <bob@rud.is> |
|||
Description: Helpers for Accessing and Querying Amazon Athena. Including a lightweight RJDBC shim. |
|||
URL: https://github.com/hrbrmstr/metis |
|||
BugReports: https://github.com/hrbrmstr/metis/issues |
|||
License: AGPL |
|||
Suggests: |
|||
testthat, |
|||
covr |
|||
Depends: |
|||
R (>= 3.2.0), |
|||
RJDBC |
|||
Imports: |
|||
DBI, |
|||
dplyr, |
|||
ini |
|||
RoxygenNote: 6.0.1 |
@ -0,0 +1,14 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
export(Athena) |
|||
export(athena_connect) |
|||
exportClasses(AthenaConnection) |
|||
exportClasses(AthenaDriver) |
|||
exportClasses(AthenaResult) |
|||
exportMethods(dbConnect) |
|||
exportMethods(dbGetQuery) |
|||
exportMethods(dbSendQuery) |
|||
import(DBI) |
|||
import(RJDBC) |
|||
import(dplyr) |
|||
import(ini) |
@ -0,0 +1,2 @@ |
|||
0.1.0 |
|||
* Initial release |
@ -0,0 +1,91 @@ |
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef")) |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
Athena <- function(identifier.quote='`') { |
|||
drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver", |
|||
system.file("AthenaJDBC41-1.0.1.jar", package="metis"), |
|||
identifier.quote="'") |
|||
return(as(drv, "AthenaDriver")) |
|||
} |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setMethod( |
|||
|
|||
"dbConnect", |
|||
"AthenaDriver", |
|||
|
|||
def = function(drv, |
|||
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", |
|||
conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', |
|||
schema_name, ...) { |
|||
|
|||
if (!is.null(provider)) { |
|||
|
|||
jc <- callNextMethod(drv, conn_string, |
|||
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), |
|||
schema_name=schema_name, |
|||
aws_credentials_provider_class=provider, ...) |
|||
|
|||
} else { |
|||
|
|||
jc <- callNextMethod(drv, |
|||
'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', |
|||
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), |
|||
schema_name=schema_name, |
|||
user = Sys.getenv("AWS_ACCESS_KEY_ID"), |
|||
password = Sys.getenv("AWS_SECRET_ACCESS_KEY")) |
|||
|
|||
} |
|||
|
|||
return(as(jc, "AthenaConnection")) |
|||
|
|||
} |
|||
|
|||
) |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setClass("AthenaConnection", contains = "JDBCConnection") |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setClass("AthenaResult", contains = "JDBCResult") |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setMethod( |
|||
|
|||
"dbSendQuery", |
|||
"AthenaDriver", |
|||
|
|||
def = function(conn, statement, ...) { |
|||
return(as(callNextMethod(), "AthenaResult")) |
|||
} |
|||
|
|||
) |
|||
|
|||
#' AthenaJDBC |
|||
#' |
|||
#' @export |
|||
setMethod( |
|||
|
|||
"dbGetQuery", |
|||
signature(conn="AthenaConnection", statement="character"), |
|||
|
|||
def = function(conn, statement, ...) { |
|||
r <- dbSendQuery(conn, statement, ...) |
|||
on.exit(.jcall(r@stat, "V", "close")) |
|||
dplyr::tbl_df(fetch(r, -1, block=256)) |
|||
} |
|||
|
|||
) |
@ -0,0 +1,12 @@ |
|||
#' Helpers for Accessing and Querying Amazon Athena |
|||
#' |
|||
#' Including a lightweight RJDBC shim. |
|||
#' |
|||
#' @name metis |
|||
#' @docType package |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import RJDBC |
|||
#' @import DBI |
|||
#' @import dplyr |
|||
#' @import ini |
|||
NULL |
@ -0,0 +1,48 @@ |
|||
#' Make a JDBC connection to Athena |
|||
#' |
|||
#' Handles the up-front JDBC config |
|||
#' |
|||
#' For all connection types it is expected that you have the following environment variables |
|||
#' defined (a good place is `~/.Renviron`): |
|||
#' |
|||
#' - `AWS_S3_STAGING_DIR`: the name of the S3 bucket where Athena can write stuff |
|||
#' - `AWS_PROFILE`: the AWS profile ID in `~/.aws/credentials` (defaults to `default` if not present) |
|||
#' |
|||
#' For `simple` == `FALSE` the expectation is that you're working with a managed |
|||
#' `~/.aws/credentials` file. |
|||
#' |
|||
#' @md |
|||
#' @param default_schema def sch |
|||
#' @param simple simple |
|||
#' @export |
|||
athena_connect <- function(default_schema, simple=FALSE) { |
|||
|
|||
athena_jdbc <- Athena() |
|||
|
|||
aws_config <- ini::read.ini(path.expand("~/.aws/credentials")) |
|||
aws_profile <- aws_config[Sys.getenv("AWS_PROFILE", "default")][[1]] |
|||
|
|||
Sys.unsetenv("AWS_ACCESS_KEY_ID") |
|||
Sys.unsetenv("AWS_SECRET_ACCESS_KEY") |
|||
|
|||
Sys.setenv(AWS_ACCESS_KEY_ID = aws_profile$aws_access_key_id) |
|||
Sys.setenv(AWS_SECRET_ACCESS_KEY = aws_profile$aws_secret_access_key) |
|||
|
|||
con <- NULL |
|||
|
|||
if (!simple) { |
|||
|
|||
Sys.unsetenv("AWS_SESSION_TOKEN") |
|||
Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token) |
|||
|
|||
con <- dbConnect(athena_jdbc, schema_name = default_schema) |
|||
|
|||
} else { |
|||
|
|||
con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema) |
|||
|
|||
} |
|||
|
|||
con |
|||
|
|||
} |
@ -0,0 +1,63 @@ |
|||
--- |
|||
output: rmarkdown::github_document |
|||
--- |
|||
|
|||
[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena |
|||
|
|||
Including a lightweight RJDBC shim. |
|||
|
|||
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) |
|||
|
|||
THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. |
|||
|
|||
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena |
|||
connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. |
|||
|
|||
It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). |
|||
|
|||
See the **Usage** section for an example. |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) |
|||
- `Athena`: AthenaJDBC` |
|||
- `AthenaConnection-class`: AthenaJDBC |
|||
- `AthenaDriver-class`: AthenaJDBC |
|||
- `AthenaResult-class`: AthenaJDBC |
|||
- `dbConnect-method`: AthenaJDBC |
|||
- `dbGetQuery-method`: AthenaJDBC |
|||
- `dbSendQuery-method`: AthenaJDBC |
|||
|
|||
### Installation |
|||
|
|||
```{r eval=FALSE} |
|||
devtools::install_github("hrbrmstr/metis") |
|||
``` |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} |
|||
options(width=120) |
|||
``` |
|||
|
|||
### Usage |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE} |
|||
library(metis) |
|||
library(dplyr) |
|||
|
|||
# current verison |
|||
packageVersion("metis") |
|||
``` |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE} |
|||
ath <- athena_connect("your_schema_name") |
|||
|
|||
res <- dbGetQuery(ath, " |
|||
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, |
|||
port as field, count(port) cnt_field FROM your_schema_name.your_table_name |
|||
WHERE CONTAINS(ARRAY['201705'], date) |
|||
AND port IN (445, 139, 3389) |
|||
AND timestamp > date '2017-05-01' |
|||
AND timestamp <= date '2017-05-22' |
|||
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 |
|||
") |
|||
``` |
@ -0,0 +1,57 @@ |
|||
|
|||
[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena |
|||
|
|||
Including a lightweight RJDBC shim. |
|||
|
|||
![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg) |
|||
|
|||
THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG. |
|||
|
|||
The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`. |
|||
|
|||
It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). |
|||
|
|||
See the **Usage** section for an example. |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart) |
|||
- `Athena`: AthenaJDBC\` |
|||
- `AthenaConnection-class`: AthenaJDBC |
|||
- `AthenaDriver-class`: AthenaJDBC |
|||
- `AthenaResult-class`: AthenaJDBC |
|||
- `dbConnect-method`: AthenaJDBC |
|||
- `dbGetQuery-method`: AthenaJDBC |
|||
- `dbSendQuery-method`: AthenaJDBC |
|||
|
|||
### Installation |
|||
|
|||
``` r |
|||
devtools::install_github("hrbrmstr/metis") |
|||
``` |
|||
|
|||
### Usage |
|||
|
|||
``` r |
|||
library(metis) |
|||
library(dplyr) |
|||
|
|||
# current verison |
|||
packageVersion("metis") |
|||
``` |
|||
|
|||
## [1] '0.1.0' |
|||
|
|||
``` r |
|||
ath <- athena_connect("your_schema_name") |
|||
|
|||
res <- dbGetQuery(ath, " |
|||
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp, |
|||
port as field, count(port) cnt_field FROM your_schema_name.your_table_name |
|||
WHERE CONTAINS(ARRAY['201705'], date) |
|||
AND port IN (445, 139, 3389) |
|||
AND timestamp > date '2017-05-01' |
|||
AND timestamp <= date '2017-05-22' |
|||
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000 |
|||
") |
|||
``` |
Binary file not shown.
@ -0,0 +1,11 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\name{Athena} |
|||
\alias{Athena} |
|||
\title{AthenaJDBC} |
|||
\usage{ |
|||
Athena(identifier.quote = "`") |
|||
} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,9 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{class} |
|||
\name{AthenaConnection-class} |
|||
\alias{AthenaConnection-class} |
|||
\title{AthenaJDBC} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,9 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{class} |
|||
\name{AthenaDriver-class} |
|||
\alias{AthenaDriver-class} |
|||
\title{AthenaJDBC} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,9 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{class} |
|||
\name{AthenaResult-class} |
|||
\alias{AthenaResult-class} |
|||
\title{AthenaJDBC} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,27 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/metis.r |
|||
\name{athena_connect} |
|||
\alias{athena_connect} |
|||
\title{Make a JDBC connection to Athena} |
|||
\usage{ |
|||
athena_connect(default_schema, simple = FALSE) |
|||
} |
|||
\arguments{ |
|||
\item{default_schema}{def sch} |
|||
|
|||
\item{simple}{simple} |
|||
} |
|||
\description{ |
|||
Handles the up-front JDBC config |
|||
} |
|||
\details{ |
|||
For all connection types it is expected that you have the following environment variables |
|||
defined (a good place is \code{~/.Renviron}): |
|||
\itemize{ |
|||
\item \code{AWS_S3_STAGING_DIR}: the name of the S3 bucket where Athena can write stuff |
|||
\item \code{AWS_PROFILE}: the AWS profile ID in \code{~/.aws/credentials} (defaults to \code{default} if not present) |
|||
} |
|||
|
|||
For \code{simple} == \code{FALSE} the expectation is that you're working with a managed |
|||
\code{~/.aws/credentials} file. |
|||
} |
@ -0,0 +1,15 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{methods} |
|||
\name{dbConnect,AthenaDriver-method} |
|||
\alias{dbConnect,AthenaDriver-method} |
|||
\title{AthenaJDBC} |
|||
\usage{ |
|||
\S4method{dbConnect}{AthenaDriver}(drv, |
|||
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", |
|||
conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/", |
|||
schema_name, ...) |
|||
} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,12 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{methods} |
|||
\name{dbGetQuery,AthenaConnection,character-method} |
|||
\alias{dbGetQuery,AthenaConnection,character-method} |
|||
\title{AthenaJDBC} |
|||
\usage{ |
|||
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...) |
|||
} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,12 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/jdbc.r |
|||
\docType{methods} |
|||
\name{dbSendQuery,AthenaDriver,ANY-method} |
|||
\alias{dbSendQuery,AthenaDriver,ANY-method} |
|||
\title{AthenaJDBC} |
|||
\usage{ |
|||
\S4method{dbSendQuery}{AthenaDriver,ANY}(conn, statement, ...) |
|||
} |
|||
\description{ |
|||
AthenaJDBC |
|||
} |
@ -0,0 +1,13 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/metis-package.R |
|||
\docType{package} |
|||
\name{metis} |
|||
\alias{metis} |
|||
\alias{metis-package} |
|||
\title{Helpers for Accessing and Querying Amazon Athena} |
|||
\description{ |
|||
Including a lightweight RJDBC shim. |
|||
} |
|||
\author{ |
|||
Bob Rudis (bob@rud.is) |
|||
} |
@ -0,0 +1,21 @@ |
|||
Version: 1.0 |
|||
|
|||
RestoreWorkspace: Default |
|||
SaveWorkspace: Default |
|||
AlwaysSaveHistory: Default |
|||
|
|||
EnableCodeIndexing: Yes |
|||
UseSpacesForTab: Yes |
|||
NumSpacesForTab: 2 |
|||
Encoding: UTF-8 |
|||
|
|||
RnwWeave: Sweave |
|||
LaTeX: pdfLaTeX |
|||
|
|||
StripTrailingWhitespace: Yes |
|||
|
|||
BuildType: Package |
|||
PackageUseDevtools: Yes |
|||
PackageInstallArgs: --no-multiarch --with-keep.source |
|||
PackageBuildArgs: --resave-data |
|||
PackageRoxygenize: rd,collate,namespace |
@ -0,0 +1,2 @@ |
|||
library(testthat) |
|||
test_check("metis") |
@ -0,0 +1,6 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
|
|||
#expect_that(some_function(), is_a("data.frame")) |
|||
|
|||
}) |
Ladataan…
Reference in new issue