Browse Source

initial commit

boB Rudis 2 years ago
commit
a01ab3351f
No known key found for this signature in database

+ 10
- 0
.Rbuildignore View File

@@ -0,0 +1,10 @@
1
+^.*\.Rproj$
2
+^\.Rproj\.user$
3
+^\.travis\.yml$
4
+^README\.*Rmd$
5
+^README\.*html$
6
+^NOTES\.*Rmd$
7
+^NOTES\.*html$
8
+^\.codecov\.yml$
9
+^README_files$
10
+^doc$

+ 1
- 0
.codecov.yml View File

@@ -0,0 +1 @@
1
+comment: false

+ 8
- 0
.gitignore View File

@@ -0,0 +1,8 @@
1
+.DS_Store
2
+.Rproj.user
3
+.Rhistory
4
+.RData
5
+.Rproj
6
+src/*.o
7
+src/*.so
8
+src/*.dll

+ 31
- 0
.travis.yml View File

@@ -0,0 +1,31 @@
1
+language: r
2
+
3
+warnings_are_errors: true
4
+
5
+sudo: required
6
+
7
+cache: packages
8
+
9
+r:
10
+ - oldrel
11
+ - release
12
+ - devel
13
+
14
+apt_packages:
15
+  - libv8-dev
16
+  - xclip
17
+
18
+env:
19
+ global:
20
+   - CRAN: http://cran.rstudio.com
21
+
22
+after_success:
23
+  - Rscript -e 'covr::codecov()'
24
+
25
+notifications:
26
+  email:
27
+    - bob@rud.is
28
+  irc:
29
+    channels:
30
+      - "104.236.112.222#builds"
31
+    nick: travisci

+ 22
- 0
DESCRIPTION View File

@@ -0,0 +1,22 @@
1
+Package: metis
2
+Type: Package
3
+Title: Helpers for Accessing and Querying Amazon Athena
4
+Version: 0.1.0
5
+Date: 2017-05-16
6
+Author: Bob Rudis (bob@rud.is)
7
+Maintainer: Bob Rudis <bob@rud.is>
8
+Description: Helpers for Accessing and Querying Amazon Athena. Including a lightweight RJDBC shim.
9
+URL: https://github.com/hrbrmstr/metis
10
+BugReports: https://github.com/hrbrmstr/metis/issues
11
+License: AGPL
12
+Suggests:
13
+    testthat,
14
+    covr
15
+Depends:
16
+    R (>= 3.2.0),
17
+    RJDBC
18
+Imports:
19
+    DBI,
20
+    dplyr,
21
+    ini
22
+RoxygenNote: 6.0.1

+ 14
- 0
NAMESPACE View File

@@ -0,0 +1,14 @@
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(Athena)
4
+export(athena_connect)
5
+exportClasses(AthenaConnection)
6
+exportClasses(AthenaDriver)
7
+exportClasses(AthenaResult)
8
+exportMethods(dbConnect)
9
+exportMethods(dbGetQuery)
10
+exportMethods(dbSendQuery)
11
+import(DBI)
12
+import(RJDBC)
13
+import(dplyr)
14
+import(ini)

+ 2
- 0
NEWS.md View File

@@ -0,0 +1,2 @@
1
+0.1.0 
2
+* Initial release

+ 91
- 0
R/jdbc.r View File

@@ -0,0 +1,91 @@
1
+#' AthenaJDBC
2
+#'
3
+#' @export
4
+setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef"))
5
+
6
+#' AthenaJDBC
7
+#'
8
+#' @export
9
+Athena <- function(identifier.quote='`') {
10
+  drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver",
11
+              system.file("AthenaJDBC41-1.0.1.jar", package="metis"),
12
+              identifier.quote="'")
13
+  return(as(drv, "AthenaDriver"))
14
+}
15
+
16
+#' AthenaJDBC
17
+#'
18
+#' @export
19
+setMethod(
20
+
21
+  "dbConnect",
22
+  "AthenaDriver",
23
+
24
+  def = function(drv,
25
+                 provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider",
26
+                 conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/',
27
+                 schema_name, ...) {
28
+
29
+    if (!is.null(provider)) {
30
+
31
+      jc <- callNextMethod(drv, conn_string,
32
+                           s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"),
33
+                           schema_name=schema_name,
34
+                           aws_credentials_provider_class=provider, ...)
35
+
36
+    } else {
37
+
38
+      jc <- callNextMethod(drv,
39
+                       'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/',
40
+                       s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"),
41
+                       schema_name=schema_name,
42
+                       user = Sys.getenv("AWS_ACCESS_KEY_ID"),
43
+                       password = Sys.getenv("AWS_SECRET_ACCESS_KEY"))
44
+
45
+    }
46
+
47
+    return(as(jc, "AthenaConnection"))
48
+
49
+  }
50
+
51
+)
52
+
53
+#' AthenaJDBC
54
+#'
55
+#' @export
56
+setClass("AthenaConnection", contains = "JDBCConnection")
57
+
58
+#' AthenaJDBC
59
+#'
60
+#' @export
61
+setClass("AthenaResult", contains = "JDBCResult")
62
+
63
+#' AthenaJDBC
64
+#'
65
+#' @export
66
+setMethod(
67
+
68
+  "dbSendQuery",
69
+  "AthenaDriver",
70
+
71
+  def = function(conn, statement, ...) {
72
+    return(as(callNextMethod(), "AthenaResult"))
73
+  }
74
+
75
+)
76
+
77
+#' AthenaJDBC
78
+#'
79
+#' @export
80
+setMethod(
81
+
82
+  "dbGetQuery",
83
+  signature(conn="AthenaConnection", statement="character"),
84
+
85
+  def = function(conn, statement, ...) {
86
+    r <- dbSendQuery(conn, statement, ...)
87
+    on.exit(.jcall(r@stat, "V", "close"))
88
+    dplyr::tbl_df(fetch(r, -1, block=256))
89
+  }
90
+
91
+)

+ 12
- 0
R/metis-package.R View File

@@ -0,0 +1,12 @@
1
+#' Helpers for Accessing and Querying Amazon Athena
2
+#'
3
+#' Including a lightweight RJDBC shim.
4
+#'
5
+#' @name metis
6
+#' @docType package
7
+#' @author Bob Rudis (bob@@rud.is)
8
+#' @import RJDBC
9
+#' @import DBI
10
+#' @import dplyr
11
+#' @import ini
12
+NULL

+ 48
- 0
R/metis.r View File

@@ -0,0 +1,48 @@
1
+#' Make a JDBC connection to Athena
2
+#'
3
+#' Handles the up-front JDBC config
4
+#'
5
+#' For all connection types it is expected that you have the following environment variables
6
+#' defined (a good place is `~/.Renviron`):
7
+#'
8
+#' - `AWS_S3_STAGING_DIR`: the name of the S3 bucket where Athena can write stuff
9
+#' - `AWS_PROFILE`: the AWS profile ID in `~/.aws/credentials` (defaults to `default` if not present)
10
+#'
11
+#' For `simple` == `FALSE` the expectation is that you're working with a managed
12
+#' `~/.aws/credentials` file.
13
+#'
14
+#' @md
15
+#' @param default_schema def sch
16
+#' @param simple simple
17
+#' @export
18
+athena_connect <- function(default_schema, simple=FALSE) {
19
+
20
+  athena_jdbc <- Athena()
21
+
22
+  aws_config <- ini::read.ini(path.expand("~/.aws/credentials"))
23
+  aws_profile <- aws_config[Sys.getenv("AWS_PROFILE", "default")][[1]]
24
+
25
+  Sys.unsetenv("AWS_ACCESS_KEY_ID")
26
+  Sys.unsetenv("AWS_SECRET_ACCESS_KEY")
27
+
28
+  Sys.setenv(AWS_ACCESS_KEY_ID = aws_profile$aws_access_key_id)
29
+  Sys.setenv(AWS_SECRET_ACCESS_KEY = aws_profile$aws_secret_access_key)
30
+
31
+  con <- NULL
32
+
33
+  if (!simple) {
34
+
35
+    Sys.unsetenv("AWS_SESSION_TOKEN")
36
+    Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token)
37
+
38
+    con <- dbConnect(athena_jdbc, schema_name = default_schema)
39
+
40
+  } else {
41
+
42
+    con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema)
43
+
44
+  }
45
+
46
+  con
47
+
48
+}

+ 63
- 0
README.Rmd View File

@@ -0,0 +1,63 @@
1
+---
2
+output: rmarkdown::github_document
3
+---
4
+
5
+[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena
6
+
7
+Including a lightweight RJDBC shim.
8
+
9
+![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)
10
+
11
+THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG.
12
+
13
+The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena
14
+connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
15
+
16
+It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables).
17
+
18
+See the **Usage** section for an example.
19
+
20
+The following functions are implemented:
21
+
22
+- `athena_connect`:	Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart)
23
+- `Athena`:	AthenaJDBC`
24
+- `AthenaConnection-class`:	AthenaJDBC
25
+- `AthenaDriver-class`:	AthenaJDBC
26
+- `AthenaResult-class`:	AthenaJDBC
27
+- `dbConnect-method`:	AthenaJDBC
28
+- `dbGetQuery-method`:	AthenaJDBC
29
+- `dbSendQuery-method`:	AthenaJDBC
30
+
31
+### Installation
32
+
33
+```{r eval=FALSE}
34
+devtools::install_github("hrbrmstr/metis")
35
+```
36
+
37
+```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
38
+options(width=120)
39
+```
40
+
41
+### Usage
42
+
43
+```{r message=FALSE, warning=FALSE, error=FALSE}
44
+library(metis)
45
+library(dplyr)
46
+
47
+# current verison
48
+packageVersion("metis")
49
+```
50
+
51
+```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE}
52
+ath <- athena_connect("your_schema_name")
53
+
54
+res <- dbGetQuery(ath, "
55
+SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp,
56
+        port as field, count(port) cnt_field FROM your_schema_name.your_table_name
57
+        WHERE CONTAINS(ARRAY['201705'], date)
58
+        AND port IN (445, 139, 3389)
59
+        AND timestamp > date '2017-05-01'
60
+        AND timestamp <= date '2017-05-22'
61
+GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000
62
+")
63
+```

+ 57
- 0
README.md View File

@@ -0,0 +1,57 @@
1
+
2
+[`metis`](https://en.wikipedia.org/wiki/Metis_(mythology)) : Helpers for Accessing and Querying Amazon Athena
3
+
4
+Including a lightweight RJDBC shim.
5
+
6
+![](https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Winged_goddess_Louvre_F32.jpg/300px-Winged_goddess_Louvre_F32.jpg)
7
+
8
+THIS IS SUPER ALPHA QUALITY. NOTHING TO SEE HERE. MOVE ALONG.
9
+
10
+The goal will be to get around enough of the "gotchas" that are preventing raw RJDBC Athena connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`fetchSize` problem](https://www.reddit.com/r/aws/comments/6aq22b/fetchsize_limit/) without having to not use `dbGetQuery()`.
11
+
12
+It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables).
13
+
14
+See the **Usage** section for an example.
15
+
16
+The following functions are implemented:
17
+
18
+-   `athena_connect`: Make a JDBC connection to Athena (this returns an `AthenaConnection` object which is a super-class of it's RJDBC vanilla counterpart)
19
+-   `Athena`: AthenaJDBC\`
20
+-   `AthenaConnection-class`: AthenaJDBC
21
+-   `AthenaDriver-class`: AthenaJDBC
22
+-   `AthenaResult-class`: AthenaJDBC
23
+-   `dbConnect-method`: AthenaJDBC
24
+-   `dbGetQuery-method`: AthenaJDBC
25
+-   `dbSendQuery-method`: AthenaJDBC
26
+
27
+### Installation
28
+
29
+``` r
30
+devtools::install_github("hrbrmstr/metis")
31
+```
32
+
33
+### Usage
34
+
35
+``` r
36
+library(metis)
37
+library(dplyr)
38
+
39
+# current verison
40
+packageVersion("metis")
41
+```
42
+
43
+    ## [1] '0.1.0'
44
+
45
+``` r
46
+ath <- athena_connect("your_schema_name")
47
+
48
+res <- dbGetQuery(ath, "
49
+SELECT format_datetime(timestamp, 'yyyy-MM-dd HH:00:00') timestamp,
50
+        port as field, count(port) cnt_field FROM your_schema_name.your_table_name
51
+        WHERE CONTAINS(ARRAY['201705'], date)
52
+        AND port IN (445, 139, 3389)
53
+        AND timestamp > date '2017-05-01'
54
+        AND timestamp <= date '2017-05-22'
55
+GROUP BY format_datetime(timestamp, 'yyyy-MM-dd HH:00:00'), port LIMIT 1000000
56
+")
57
+```

BIN
inst/AthenaJDBC41-1.0.1.jar View File


+ 11
- 0
man/Athena.Rd View File

@@ -0,0 +1,11 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\name{Athena}
4
+\alias{Athena}
5
+\title{AthenaJDBC}
6
+\usage{
7
+Athena(identifier.quote = "`")
8
+}
9
+\description{
10
+AthenaJDBC
11
+}

+ 9
- 0
man/AthenaConnection-class.Rd View File

@@ -0,0 +1,9 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{class}
4
+\name{AthenaConnection-class}
5
+\alias{AthenaConnection-class}
6
+\title{AthenaJDBC}
7
+\description{
8
+AthenaJDBC
9
+}

+ 9
- 0
man/AthenaDriver-class.Rd View File

@@ -0,0 +1,9 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{class}
4
+\name{AthenaDriver-class}
5
+\alias{AthenaDriver-class}
6
+\title{AthenaJDBC}
7
+\description{
8
+AthenaJDBC
9
+}

+ 9
- 0
man/AthenaResult-class.Rd View File

@@ -0,0 +1,9 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{class}
4
+\name{AthenaResult-class}
5
+\alias{AthenaResult-class}
6
+\title{AthenaJDBC}
7
+\description{
8
+AthenaJDBC
9
+}

+ 27
- 0
man/athena_connect.Rd View File

@@ -0,0 +1,27 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/metis.r
3
+\name{athena_connect}
4
+\alias{athena_connect}
5
+\title{Make a JDBC connection to Athena}
6
+\usage{
7
+athena_connect(default_schema, simple = FALSE)
8
+}
9
+\arguments{
10
+\item{default_schema}{def sch}
11
+
12
+\item{simple}{simple}
13
+}
14
+\description{
15
+Handles the up-front JDBC config
16
+}
17
+\details{
18
+For all connection types it is expected that you have the following environment variables
19
+defined (a good place is \code{~/.Renviron}):
20
+\itemize{
21
+\item \code{AWS_S3_STAGING_DIR}: the name of the S3 bucket where Athena can write stuff
22
+\item \code{AWS_PROFILE}: the AWS profile ID in \code{~/.aws/credentials} (defaults to \code{default} if not present)
23
+}
24
+
25
+For \code{simple} == \code{FALSE} the expectation is that you're working with a managed
26
+\code{~/.aws/credentials} file.
27
+}

+ 15
- 0
man/dbConnect-AthenaDriver-method.Rd View File

@@ -0,0 +1,15 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{methods}
4
+\name{dbConnect,AthenaDriver-method}
5
+\alias{dbConnect,AthenaDriver-method}
6
+\title{AthenaJDBC}
7
+\usage{
8
+\S4method{dbConnect}{AthenaDriver}(drv,
9
+  provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider",
10
+  conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/",
11
+  schema_name, ...)
12
+}
13
+\description{
14
+AthenaJDBC
15
+}

+ 12
- 0
man/dbGetQuery-AthenaConnection-character-method.Rd View File

@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{methods}
4
+\name{dbGetQuery,AthenaConnection,character-method}
5
+\alias{dbGetQuery,AthenaConnection,character-method}
6
+\title{AthenaJDBC}
7
+\usage{
8
+\S4method{dbGetQuery}{AthenaConnection,character}(conn, statement, ...)
9
+}
10
+\description{
11
+AthenaJDBC
12
+}

+ 12
- 0
man/dbSendQuery-AthenaDriver-ANY-method.Rd View File

@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/jdbc.r
3
+\docType{methods}
4
+\name{dbSendQuery,AthenaDriver,ANY-method}
5
+\alias{dbSendQuery,AthenaDriver,ANY-method}
6
+\title{AthenaJDBC}
7
+\usage{
8
+\S4method{dbSendQuery}{AthenaDriver,ANY}(conn, statement, ...)
9
+}
10
+\description{
11
+AthenaJDBC
12
+}

+ 13
- 0
man/metis.Rd View File

@@ -0,0 +1,13 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/metis-package.R
3
+\docType{package}
4
+\name{metis}
5
+\alias{metis}
6
+\alias{metis-package}
7
+\title{Helpers for Accessing and Querying Amazon Athena}
8
+\description{
9
+Including a lightweight RJDBC shim.
10
+}
11
+\author{
12
+Bob Rudis (bob@rud.is)
13
+}

+ 21
- 0
metis.Rproj View File

@@ -0,0 +1,21 @@
1
+Version: 1.0
2
+
3
+RestoreWorkspace: Default
4
+SaveWorkspace: Default
5
+AlwaysSaveHistory: Default
6
+
7
+EnableCodeIndexing: Yes
8
+UseSpacesForTab: Yes
9
+NumSpacesForTab: 2
10
+Encoding: UTF-8
11
+
12
+RnwWeave: Sweave
13
+LaTeX: pdfLaTeX
14
+
15
+StripTrailingWhitespace: Yes
16
+
17
+BuildType: Package
18
+PackageUseDevtools: Yes
19
+PackageInstallArgs: --no-multiarch --with-keep.source
20
+PackageBuildArgs: --resave-data
21
+PackageRoxygenize: rd,collate,namespace

+ 2
- 0
tests/test-all.R View File

@@ -0,0 +1,2 @@
1
+library(testthat)
2
+test_check("metis")

+ 6
- 0
tests/testthat/test-metis.R View File

@@ -0,0 +1,6 @@
1
+context("basic functionality")
2
+test_that("we can do something", {
3
+
4
+  #expect_that(some_function(), is_a("data.frame"))
5
+
6
+})

Loading…
Cancel
Save