boB Rudis пре 2 година
комит
a01ab3351f
No known key found for this signature in database GPG Key ID: 2A514A4997464560
25 измењених фајлова са 505 додато и 0 уклоњено
  1. +10
    -0
      .Rbuildignore
  2. +1
    -0
      .codecov.yml
  3. +8
    -0
      .gitignore
  4. +31
    -0
      .travis.yml
  5. +22
    -0
      DESCRIPTION
  6. +14
    -0
      NAMESPACE
  7. +2
    -0
      NEWS.md
  8. +91
    -0
      R/jdbc.r
  9. +12
    -0
      R/metis-package.R
  10. +48
    -0
      R/metis.r
  11. +63
    -0
      README.Rmd
  12. +57
    -0
      README.md
  13. BIN
      inst/AthenaJDBC41-1.0.1.jar
  14. +11
    -0
      man/Athena.Rd
  15. +9
    -0
      man/AthenaConnection-class.Rd
  16. +9
    -0
      man/AthenaDriver-class.Rd
  17. +9
    -0
      man/AthenaResult-class.Rd
  18. +27
    -0
      man/athena_connect.Rd
  19. +15
    -0
      man/dbConnect-AthenaDriver-method.Rd
  20. +12
    -0
      man/dbGetQuery-AthenaConnection-character-method.Rd
  21. +12
    -0
      man/dbSendQuery-AthenaDriver-ANY-method.Rd
  22. +13
    -0
      man/metis.Rd
  23. +21
    -0
      metis.Rproj
  24. +2
    -0
      tests/test-all.R
  25. +6
    -0
      tests/testthat/test-metis.R

+ 10
- 0
.Rbuildignore Прегледај датотеку

@@ -0,0 +1,10 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$

+ 1
- 0
.codecov.yml Прегледај датотеку

@@ -0,0 +1 @@
comment: false

+ 8
- 0
.gitignore Прегледај датотеку

@@ -0,0 +1,8 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
src/*.o
src/*.so
src/*.dll

+ 31
- 0
.travis.yml Прегледај датотеку

@@ -0,0 +1,31 @@
language: r

warnings_are_errors: true

sudo: required

cache: packages

r:
- oldrel
- release
- devel

apt_packages:
- libv8-dev
- xclip

env:
global:
- CRAN: http://cran.rstudio.com

after_success:
- Rscript -e 'covr::codecov()'

notifications:
email:
- bob@rud.is
irc:
channels:
- "104.236.112.222#builds"
nick: travisci

+ 22
- 0
DESCRIPTION Прегледај датотеку

@@ -0,0 +1,22 @@
Package: metis
Type: Package
Title: Helpers for Accessing and Querying Amazon Athena
Version: 0.1.0
Date: 2017-05-16
Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is>
Description: Helpers for Accessing and Querying Amazon Athena. Including a lightweight RJDBC shim.
URL: https://github.com/hrbrmstr/metis
BugReports: https://github.com/hrbrmstr/metis/issues
License: AGPL
Suggests:
testthat,
covr
Depends:
R (>= 3.2.0),
RJDBC
Imports:
DBI,
dplyr,
ini
RoxygenNote: 6.0.1

+ 14
- 0
NAMESPACE Прегледај датотеку

@@ -0,0 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(Athena)
export(athena_connect)
exportClasses(AthenaConnection)
exportClasses(AthenaDriver)
exportClasses(AthenaResult)
exportMethods(dbConnect)
exportMethods(dbGetQuery)
exportMethods(dbSendQuery)
import(DBI)
import(RJDBC)
import(dplyr)
import(ini)

+ 2
- 0
NEWS.md Прегледај датотеку

@@ -0,0 +1,2 @@
0.1.0
* Initial release

+ 91
- 0
R/jdbc.r Прегледај датотеку

@@ -0,0 +1,91 @@
#' AthenaJDBC
#'
#' @export
setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef"))

#' AthenaJDBC
#'
#' @export
Athena <- function(identifier.quote='`') {
drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver",
system.file("AthenaJDBC41-1.0.1.jar", package="metis"),
identifier.quote="'")
return(as(drv, "AthenaDriver"))
}

#' AthenaJDBC
#'
#' @export
setMethod(

"dbConnect",
"AthenaDriver",

def = function(drv,
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider",
conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/',
schema_name, ...) {

if (!is.null(provider)) {

jc <- callNextMethod(drv, conn_string,
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name=schema_name,
aws_credentials_provider_class=provider, ...)

} else {

jc <- callNextMethod(drv,
'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/',
s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"),
schema_name=schema_name,
user = Sys.getenv("AWS_ACCESS_KEY_ID"),
password = Sys.getenv("AWS_SECRET_ACCESS_KEY"))

}

return(as(jc, "AthenaConnection"))

}

)

#' AthenaJDBC
#'
#' @export
setClass("AthenaConnection", contains = "JDBCConnection")

#' AthenaJDBC
#'
#' @export
setClass("AthenaResult", contains = "JDBCResult")

#' AthenaJDBC
#'
#' @export
setMethod(

"dbSendQuery",
"AthenaDriver",

def = function(conn, statement, ...) {
return(as(callNextMethod(), "AthenaResult"))
}

)

#' AthenaJDBC
#'
#' @export
setMethod(

"dbGetQuery",
signature(conn="AthenaConnection", statement="character"),

def = function(conn, statement, ...) {
r <- dbSendQuery(conn, statement, ...)
on.exit(.jcall(r@stat, "V", "close"))
dplyr::tbl_df(fetch(r, -1, block=256))
}

)

+ 12
- 0
R/metis-package.R Прегледај датотеку

@@ -0,0 +1,12 @@
#' Helpers for Accessing and Querying Amazon Athena
#'
#' Including a lightweight RJDBC shim.
#'
#' @name metis
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import RJDBC
#' @import DBI
#' @import dplyr
#' @import ini
NULL

+ 48
- 0
R/metis.r Прегледај датотеку

@@ -0,0 +1,48 @@
#' Make a JDBC connection to Athena
#'
#' Handles the up-front JDBC config
#'
#' For all connection types it is expected that you have the following environment variables
#' defined (a good place is `~/.Renviron`):
#'
#' - `AWS_S3_STAGING_DIR`: the name of the S3 bucket where Athena can write stuff
#' - `AWS_PROFILE`: the AWS profile ID in `~/.aws/credentials` (defaults to `default` if not present)
#'
#' For `simple` == `FALSE` the expectation is that you're working with a managed
#' `~/.aws/credentials` file.
#'
#' @md
#' @param default_schema def sch
#' @param simple simple
#' @export
athena_connect <- function(default_schema, simple=FALSE) {

athena_jdbc <- Athena()

aws_config <- ini::read.ini(path.expand("~/.aws/credentials"))
aws_profile <- aws_config[Sys.getenv("AWS_PROFILE", "default")][[1]]

Sys.unsetenv("AWS_ACCESS_KEY_ID")
Sys.unsetenv("AWS_SECRET_ACCESS_KEY")

Sys.setenv(AWS_ACCESS_KEY_ID = aws_profile$aws_access_key_id)
Sys.setenv(AWS_SECRET_ACCESS_KEY = aws_profile$aws_secret_access_key)

con <- NULL

if (!simple) {

Sys.unsetenv("AWS_SESSION_TOKEN")
Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token)

con <- dbConnect(athena_jdbc, schema_name = default_schema)

} else {

con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema)

}

con

}

+ 63
- 0
README.Rmd Прегледај датотеку

@@ -0,0 +1,63 @@
---
output: rmarkdown::github_document
---

[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena

Including a lightweight RJDBC shim.

![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)

THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG.

The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena
connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.

It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables).

See the **Usage** section for an example.

The following functions are implemented:

- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart)
- `Athena`: AthenaJDBC`
- `AthenaConnection-class`: AthenaJDBC
- `AthenaDriver-class`: AthenaJDBC
- `AthenaResult-class`: AthenaJDBC
- `dbConnect-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC

### Installation

```{r eval=FALSE}
devtools::install_github("hrbrmstr/metis")
```

```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
options(width=120)
```

### Usage

```{r message=FALSE, warning=FALSE, error=FALSE}
library(metis)
library(dplyr)

# current verison
packageVersion("metis")
```

```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE}
ath <- athena_connect("your_schema_name")

res <- dbGetQuery(ath, "
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp,
port as field, count(port) cnt_field FROM your_schema_name.your_table_name
WHERE CONTAINS(ARRAY['201705'], date)
AND port IN (445, 139, 3389)
AND timestamp > date '2017-05-01'
AND timestamp <= date '2017-05-22'
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000
")
```

+ 57
- 0
README.md Прегледај датотеку

@@ -0,0 +1,57 @@

[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena

Including a lightweight RJDBC shim.

![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)

THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG.

The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.

It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables).

See the **Usage** section for an example.

The following functions are implemented:

- `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart)
- `Athena`: AthenaJDBC\`
- `AthenaConnection-class`: AthenaJDBC
- `AthenaDriver-class`: AthenaJDBC
- `AthenaResult-class`: AthenaJDBC
- `dbConnect-method`: AthenaJDBC
- `dbGetQuery-method`: AthenaJDBC
- `dbSendQuery-method`: AthenaJDBC

### Installation

``` r
devtools::install_github("hrbrmstr/metis")
```

### Usage

``` r
library(metis)
library(dplyr)

# current verison
packageVersion("metis")
```

## [1] '0.1.0'

``` r
ath <- athena_connect("your_schema_name")

res <- dbGetQuery(ath, "
SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp,
port as field, count(port) cnt_field FROM your_schema_name.your_table_name
WHERE CONTAINS(ARRAY['201705'], date)
AND port IN (445, 139, 3389)
AND timestamp > date '2017-05-01'
AND timestamp <= date '2017-05-22'
GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000
")
```

BIN
inst/AthenaJDBC41-1.0.1.jar Прегледај датотеку


+ 11
- 0
man/Athena.Rd Прегледај датотеку

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\name{Athena}
\alias{Athena}
\title{AthenaJDBC}
\usage{
Athena(identifier.quote = "`")
}
\description{
AthenaJDBC
}

+ 9
- 0
man/AthenaConnection-class.Rd Прегледај датотеку

@@ -0,0 +1,9 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaConnection-class}
\alias{AthenaConnection-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

+ 9
- 0
man/AthenaDriver-class.Rd Прегледај датотеку

@@ -0,0 +1,9 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaDriver-class}
\alias{AthenaDriver-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

+ 9
- 0
man/AthenaResult-class.Rd Прегледај датотеку

@@ -0,0 +1,9 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{class}
\name{AthenaResult-class}
\alias{AthenaResult-class}
\title{AthenaJDBC}
\description{
AthenaJDBC
}

+ 27
- 0
man/athena_connect.Rd Прегледај датотеку

@@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis.r
\name{athena_connect}
\alias{athena_connect}
\title{Make a JDBC connection to Athena}
\usage{
athena_connect(default_schema, simple = FALSE)
}
\arguments{
\item{default_schema}{def sch}

\item{simple}{simple}
}
\description{
Handles the up-front JDBC config
}
\details{
For all connection types it is expected that you have the following environment variables
defined (a good place is \code{~/.Renviron}):
\itemize{
\item \code{AWS_S3_STAGING_DIR}: the name of the S3 bucket where Athena can write stuff
\item \code{AWS_PROFILE}: the AWS profile ID in \code{~/.aws/credentials} (defaults to \code{default} if not present)
}

For \code{simple} == \code{FALSE} the expectation is that you're working with a managed
\code{~/.aws/credentials} file.
}

+ 15
- 0
man/dbConnect-AthenaDriver-method.Rd Прегледај датотеку

@@ -0,0 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbConnect,AthenaDriver-method}
\alias{dbConnect,AthenaDriver-method}
\title{AthenaJDBC}
\usage{
\S4method{dbConnect}{AthenaDriver}(drv,
provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider",
conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/",
schema_name, ...)
}
\description{
AthenaJDBC
}

+ 12
- 0
man/dbGetQuery-AthenaConnection-character-method.Rd Прегледај датотеку

@@ -0,0 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbGetQuery,AthenaConnection,character-method}
\alias{dbGetQuery,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
}
\description{
AthenaJDBC
}

+ 12
- 0
man/dbSendQuery-AthenaDriver-ANY-method.Rd Прегледај датотеку

@@ -0,0 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
\docType{methods}
\name{dbSendQuery,AthenaDriver,ANY-method}
\alias{dbSendQuery,AthenaDriver,ANY-method}
\title{AthenaJDBC}
\usage{
\S4method{dbSendQuery}{AthenaDriver,ANY}(conn, statement, ...)
}
\description{
AthenaJDBC
}

+ 13
- 0
man/metis.Rd Прегледај датотеку

@@ -0,0 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metis-package.R
\docType{package}
\name{metis}
\alias{metis}
\alias{metis-package}
\title{Helpers for Accessing and Querying Amazon Athena}
\description{
Including a lightweight RJDBC shim.
}
\author{
Bob Rudis (bob@rud.is)
}

+ 21
- 0
metis.Rproj Прегледај датотеку

@@ -0,0 +1,21 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

+ 2
- 0
tests/test-all.R Прегледај датотеку

@@ -0,0 +1,2 @@
library(testthat)
test_check("metis")

+ 6
- 0
tests/testthat/test-metis.R Прегледај датотеку

@@ -0,0 +1,6 @@
context("basic functionality")
test_that("we can do something", {

#expect_that(some_function(), is_a("data.frame"))

})

Loading…
Откажи
Сачувај