diff --git a/R/jdbc.r b/R/jdbc.r index 319e68b..08d739a 100644 --- a/R/jdbc.r +++ b/R/jdbc.r @@ -9,7 +9,7 @@ setClass("AthenaDriver", representation("JDBCDriver", identifier.quote="characte Athena <- function(identifier.quote='`') { drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver", system.file("AthenaJDBC41-1.0.1.jar", package="metis"), - identifier.quote="'") + identifier.quote=identifier.quote) return(as(drv, "AthenaDriver")) } @@ -23,8 +23,10 @@ setMethod( def = function(drv, provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", - conn_string = 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', - schema_name, ...) { + region = "us-east-1", + schema_name = "default", ...) { + + conn_string = sprintf('jdbc:awsathena://athena.%s.amazonaws.com:443/', region) if (!is.null(provider)) { @@ -35,8 +37,7 @@ setMethod( } else { - jc <- callNextMethod(drv, - 'jdbc:awsathena://athena.us-east-1.amazonaws.com:443/', + jc <- callNextMethod(drv, conn_string, s3_staging_dir=Sys.getenv("AWS_S3_STAGING_DIR"), schema_name=schema_name, user = Sys.getenv("AWS_ACCESS_KEY_ID"), @@ -85,7 +86,7 @@ setMethod( def = function(conn, statement, ...) { r <- dbSendQuery(conn, statement, ...) on.exit(.jcall(r@stat, "V", "close")) - dplyr::tbl_df(fetch(r, -1, block=256)) + dplyr::tbl_df(fetch(r, -1, block=1000)) } ) diff --git a/R/metis.r b/R/metis.r index 221fce3..743e172 100644 --- a/R/metis.r +++ b/R/metis.r @@ -11,11 +11,15 @@ #' For `simple` == `FALSE` the expectation is that you're working with a managed #' `~/.aws/credentials` file. #' +#' There's a high likelihood of params changing in the near term as I work this out, but I'm +#' not very keen on parameter-izing things like id/secret. +#' #' @md -#' @param default_schema def sch -#' @param simple simple +#' @param default_schema def schema +#' @param region AWS region (Ref: ) +#' @param simple pickup id/secret only or use temp token? (this will become more robust) #' @export -athena_connect <- function(default_schema, simple=FALSE) { +athena_connect <- function(default_schema = "default", region = "us-east-1", simple=FALSE) { athena_jdbc <- Athena() @@ -35,11 +39,11 @@ athena_connect <- function(default_schema, simple=FALSE) { Sys.unsetenv("AWS_SESSION_TOKEN") Sys.setenv(AWS_SESSION_TOKEN = aws_profile$aws_session_token) - con <- dbConnect(athena_jdbc, schema_name = default_schema) + con <- dbConnect(athena_jdbc, schema_name = default_schema, region = region) } else { - con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema) + con <- dbConnect(athena_jdbc, provider = NULL, schema_name = default_schema, region = region) } diff --git a/README.Rmd b/README.Rmd index 6e5adcb..4fdab4a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,6 +15,8 @@ connecitons from "just working" with `dplyr` v0.6.0+ and also get around the [`f It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). +This package includes the `AthenaJDBC41-1.0.1.jar` JAR file out of convenience but that will likely move to a separate package as this gets closer to prime time. + See the **Usage** section for an example. The following functions are implemented: diff --git a/README.md b/README.md index 9a5778f..e1a31b7 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ The goal will be to get around enough of the "gotchas" that are preventing raw R It will also support more than the vanilla id/secret auth mechism (it currently support the default basic auth and temp token auth, the latter via environment variables). +This package includes the `AthenaJDBC41-1.0.1.jar` JAR file out of convenience but that will likely move to a separate package as this gets closer to prime time. + See the **Usage** section for an example. The following functions are implemented: diff --git a/man/athena_connect.Rd b/man/athena_connect.Rd index de9ef71..c929a6e 100644 --- a/man/athena_connect.Rd +++ b/man/athena_connect.Rd @@ -4,12 +4,15 @@ \alias{athena_connect} \title{Make a JDBC connection to Athena} \usage{ -athena_connect(default_schema, simple = FALSE) +athena_connect(default_schema = "default", region = "us-east-1", + simple = FALSE) } \arguments{ -\item{default_schema}{def sch} +\item{default_schema}{def schema} -\item{simple}{simple} +\item{region}{AWS region (Ref: \url{http://docs.aws.amazon.com/general/latest/gr/rande.html#athena})} + +\item{simple}{pickup id/secret only or use temp token? (this will become more robust)} } \description{ Handles the up-front JDBC config @@ -24,4 +27,7 @@ defined (a good place is \code{~/.Renviron}): For \code{simple} == \code{FALSE} the expectation is that you're working with a managed \code{~/.aws/credentials} file. + +There's a high likelihood of params changing in the near term as I work this out, but I'm +not very keen on parameter-izing things like id/secret. } diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd index 1204874..2ef5c68 100644 --- a/man/dbConnect-AthenaDriver-method.Rd +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -7,8 +7,7 @@ \usage{ \S4method{dbConnect}{AthenaDriver}(drv, provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.EnvironmentVariableCredentialsProvider", - conn_string = "jdbc:awsathena://athena.us-east-1.amazonaws.com:443/", - schema_name, ...) + region = "us-east-1", schema_name = "default", ...) } \description{ AthenaJDBC