mirror of https://git.sr.ht/~hrbrmstr/metis
boB Rudis
5 years ago
27 changed files with 232 additions and 394 deletions
@ -1,48 +0,0 @@ |
|||||
#' Access and Query Amazon Athena via DBI/JDBC |
|
||||
#' |
|
||||
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, |
|
||||
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation |
|
||||
#' is included along with an interface to the 'AWS' command-line utility. |
|
||||
#' |
|
||||
#' @name metis.lite |
|
||||
#' |
|
||||
#' @section IMPORTANT: |
|
||||
#' |
|
||||
#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting |
|
||||
#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid |
|
||||
#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is |
|
||||
#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this |
|
||||
#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: |
|
||||
#' |
|
||||
#' |
|
||||
#' if (!grepl("-Xrs", getOption("java.parameters", ""))) { |
|
||||
#' options( |
|
||||
#' "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") |
|
||||
#' ) |
|
||||
#' } |
|
||||
#' |
|
||||
#' @md |
|
||||
#' @encoding UTF-8 |
|
||||
#' @keywords internal |
|
||||
#' @docType package |
|
||||
#' @author Bob Rudis (bob@@rud.is) |
|
||||
#' @import RJDBC DBI bit64 dbplyr |
|
||||
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) |
|
||||
NULL |
|
||||
|
|
||||
|
|
||||
#' Use Credentials from .aws/credentials File |
|
||||
#' |
|
||||
#' @md |
|
||||
#' @importFrom aws.signature use_credentials read_credentials |
|
||||
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] |
|
||||
#' @name use_credentials |
|
||||
#' @rdname use_credentials |
|
||||
#' @inheritParams aws.signature::use_credentials |
|
||||
#' @export |
|
||||
NULL |
|
||||
|
|
||||
#' @name read_credentials |
|
||||
#' @rdname use_credentials |
|
||||
#' @export |
|
||||
NULL |
|
@ -0,0 +1,33 @@ |
|||||
|
#' Access and Query Amazon Athena via DBI/JDBC |
||||
|
#' |
||||
|
#' Methods are provided to connect to 'Amazon' 'Athena', lookup |
||||
|
#' schemas/tables, perform queries and retrieve query results using the |
||||
|
#' Athena JDBC driver found in 'metis.jars'. |
||||
|
#' |
||||
|
#' @name metis |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @encoding UTF-8 |
||||
|
#' @keywords internal |
||||
|
#' @docType package |
||||
|
#' @author Bob Rudis (bob@@rud.is) |
||||
|
#' @import RJDBC DBI bit64 metis.jars |
||||
|
#' @importFrom methods as callNextMethod |
||||
|
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) |
||||
|
NULL |
||||
|
|
||||
|
#' Use Credentials from .aws/credentials File |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @importFrom aws.signature use_credentials read_credentials |
||||
|
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] |
||||
|
#' @name use_credentials |
||||
|
#' @rdname use_credentials |
||||
|
#' @inheritParams aws.signature::use_credentials |
||||
|
#' @export |
||||
|
NULL |
||||
|
|
||||
|
#' @name read_credentials |
||||
|
#' @rdname use_credentials |
||||
|
#' @export |
||||
|
NULL |
@ -1,124 +0,0 @@ |
|||||
#' @rdname Athena |
|
||||
#' @keywords internal |
|
||||
#' @export |
|
||||
db_data_type.AthenaConnection <- function(con, fields, ...) { |
|
||||
print("\n\n\ndb_data_type\n\n\n") |
|
||||
data_type <- function(x) { |
|
||||
switch( |
|
||||
class(x)[1], |
|
||||
integer64 = "BIGINT", |
|
||||
logical = "BOOLEAN", |
|
||||
integer = "INTEGER", |
|
||||
numeric = "DOUBLE", |
|
||||
factor = "CHARACTER", |
|
||||
character = "CHARACTER", |
|
||||
Date = "DATE", |
|
||||
POSIXct = "TIMESTAMP", |
|
||||
stop("Can't map type ", paste(class(x), collapse = "/"), |
|
||||
" to a supported database type.") |
|
||||
) |
|
||||
} |
|
||||
vapply(fields, data_type, character(1)) |
|
||||
} |
|
||||
|
|
||||
#' @rdname Athena |
|
||||
#' @keywords internal |
|
||||
#' @export |
|
||||
sql_translate_env.AthenaConnection <- function(con) { |
|
||||
|
|
||||
x <- con |
|
||||
|
|
||||
dbplyr::sql_variant( |
|
||||
|
|
||||
scalar = dbplyr::sql_translator( |
|
||||
.parent = dbplyr::base_scalar, |
|
||||
`!=` = dbplyr::sql_infix("<>"), |
|
||||
as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"), |
|
||||
as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"), |
|
||||
as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"), |
|
||||
as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), |
|
||||
as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), |
|
||||
as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), |
|
||||
as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), |
|
||||
as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"), |
|
||||
date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"), |
|
||||
grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"), |
|
||||
gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"), |
|
||||
trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"), |
|
||||
cbrt = dbplyr::sql_prefix("CBRT", 1), |
|
||||
degrees = dbplyr::sql_prefix("DEGREES", 1), |
|
||||
e = dbplyr::sql_prefix("E", 0), |
|
||||
row_number = dbplyr::sql_prefix("row_number", 0), |
|
||||
lshift = dbplyr::sql_prefix("LSHIFT", 2), |
|
||||
mod = dbplyr::sql_prefix("MOD", 2), |
|
||||
age = dbplyr::sql_prefix("AGE", 1), |
|
||||
negative = dbplyr::sql_prefix("NEGATIVE", 1), |
|
||||
pi = dbplyr::sql_prefix("PI", 0), |
|
||||
pow = dbplyr::sql_prefix("POW", 2), |
|
||||
radians = dbplyr::sql_prefix("RADIANS", 1), |
|
||||
rand = dbplyr::sql_prefix("RAND", 0), |
|
||||
rshift = dbplyr::sql_prefix("RSHIFT", 2), |
|
||||
trunc = dbplyr::sql_prefix("TRUNC", 2), |
|
||||
contains = dbplyr::sql_prefix("CONTAINS", 2), |
|
||||
convert_to = dbplyr::sql_prefix("CONVERT_TO", 2), |
|
||||
convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2), |
|
||||
string_binary = dbplyr::sql_prefix("STRING_BINARY", 1), |
|
||||
binary_string = dbplyr::sql_prefix("BINARY_STRING", 1), |
|
||||
to_char = dbplyr::sql_prefix("TO_CHAR", 2), |
|
||||
to_date = dbplyr::sql_prefix("TO_DATE", 2), |
|
||||
to_number = dbplyr::sql_prefix("TO_NUMBER", 2), |
|
||||
char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2), |
|
||||
double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1), |
|
||||
char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1), |
|
||||
flatten = dbplyr::sql_prefix("FLATTEN", 1), |
|
||||
kvgen = dbplyr::sql_prefix("KVGEN", 1), |
|
||||
repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1), |
|
||||
repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2), |
|
||||
ilike = dbplyr::sql_prefix("ILIKE", 2), |
|
||||
init_cap = dbplyr::sql_prefix("INIT_CAP", 1), |
|
||||
length = dbplyr::sql_prefix("LENGTH", 1), |
|
||||
lower = dbplyr::sql_prefix("LOWER", 1), |
|
||||
tolower = dbplyr::sql_prefix("LOWER", 1), |
|
||||
ltrim = dbplyr::sql_prefix("LTRIM", 2), |
|
||||
nullif = dbplyr::sql_prefix("NULLIF", 2), |
|
||||
position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"), |
|
||||
regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3), |
|
||||
rtrim = dbplyr::sql_prefix("RTRIM", 2), |
|
||||
rpad = dbplyr::sql_prefix("RPAD", 2), |
|
||||
rpad_with = dbplyr::sql_prefix("RPAD", 3), |
|
||||
lpad = dbplyr::sql_prefix("LPAD", 2), |
|
||||
lpad_with = dbplyr::sql_prefix("LPAD", 3), |
|
||||
strpos = dbplyr::sql_prefix("STRPOS", 2), |
|
||||
substr = dbplyr::sql_prefix("SUBSTR", 3), |
|
||||
trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"), |
|
||||
upper = dbplyr::sql_prefix("UPPER", 1), |
|
||||
toupper = dbplyr::sql_prefix("UPPER", 1) |
|
||||
), |
|
||||
|
|
||||
aggregate = dbplyr::sql_translator( |
|
||||
.parent = dbplyr::base_agg, |
|
||||
n = function() dbplyr::sql("COUNT(*)"), |
|
||||
cor = dbplyr::sql_prefix("CORR"), |
|
||||
cov = dbplyr::sql_prefix("COVAR_SAMP"), |
|
||||
sd = dbplyr::sql_prefix("STDDEV_SAMP"), |
|
||||
var = dbplyr::sql_prefix("VAR_SAMP"), |
|
||||
n_distinct = function(x) { |
|
||||
dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")")) |
|
||||
} |
|
||||
), |
|
||||
|
|
||||
window = dbplyr::sql_translator( |
|
||||
.parent = dbplyr::base_win, |
|
||||
n = function() { dbplyr::win_over(dbplyr::sql("count(*)"), |
|
||||
partition = dbplyr::win_current_group()) }, |
|
||||
cor = dbplyr::win_recycled("corr"), |
|
||||
cov = dbplyr::win_recycled("covar_samp"), |
|
||||
sd = dbplyr::win_recycled("stddev_samp"), |
|
||||
var = dbplyr::win_recycled("var_samp"), |
|
||||
all = dbplyr::win_recycled("bool_and"), |
|
||||
any = dbplyr::win_recycled("bool_or") |
|
||||
) |
|
||||
|
|
||||
) |
|
||||
|
|
||||
} |
|
@ -1,11 +1,2 @@ |
|||||
.onLoad <- function(libname, pkgname) { |
.onLoad <- function(libname, pkgname) { |
||||
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname) |
|
||||
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE)) |
|
||||
o <- getOption("java.parameters", "") |
|
||||
if (!any(grepl("-Xrs", o))) { |
|
||||
packageStartupMessage( |
|
||||
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ", |
|
||||
"please set this up in your/an Rprofile or at the start of scripts." |
|
||||
) |
|
||||
} |
|
||||
} |
} |
||||
|
Binary file not shown.
@ -1 +0,0 @@ |
|||||
log4j.rootLogger=WARN |
|
@ -0,0 +1,18 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/z-dbGetQuery.R |
||||
|
\docType{methods} |
||||
|
\name{dbGetInfo,AthenaConnection-method} |
||||
|
\alias{dbGetInfo,AthenaConnection-method} |
||||
|
\title{Retrieve connection/driver//database metadata} |
||||
|
\usage{ |
||||
|
\S4method{dbGetInfo}{AthenaConnection}(dbObj, ...) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{dbObj}{driver/connection} |
||||
|
|
||||
|
\item{...}{unused} |
||||
|
} |
||||
|
\description{ |
||||
|
Retrieve connection/driver//database metadata |
||||
|
} |
||||
|
\keyword{internal} |
@ -0,0 +1,18 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/z-dbGetQuery.R |
||||
|
\docType{methods} |
||||
|
\name{dbGetInfo,AthenaDriver-method} |
||||
|
\alias{dbGetInfo,AthenaDriver-method} |
||||
|
\title{Retrieve connection/driver/database metadata} |
||||
|
\usage{ |
||||
|
\S4method{dbGetInfo}{AthenaDriver}(dbObj, ...) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{dbObj}{driver/connection} |
||||
|
|
||||
|
\item{...}{unused} |
||||
|
} |
||||
|
\description{ |
||||
|
Retrieve connection/driver/database metadata |
||||
|
} |
||||
|
\keyword{internal} |
@ -0,0 +1,20 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/metis-package.R |
||||
|
\docType{package} |
||||
|
\encoding{UTF-8} |
||||
|
\name{metis} |
||||
|
\alias{metis} |
||||
|
\alias{metis-package} |
||||
|
\title{Access and Query Amazon Athena via DBI/JDBC} |
||||
|
\description{ |
||||
|
Methods are provided to connect to 'Amazon' 'Athena', lookup |
||||
|
schemas/tables, perform queries and retrieve query results using the |
||||
|
Athena JDBC driver found in 'metis.jars'. |
||||
|
} |
||||
|
\references{ |
||||
|
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} |
||||
|
} |
||||
|
\author{ |
||||
|
Bob Rudis (bob@rud.is) |
||||
|
} |
||||
|
\keyword{internal} |
@ -1,35 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/metis-lite-package.R |
|
||||
\docType{package} |
|
||||
\encoding{UTF-8} |
|
||||
\name{metis.lite} |
|
||||
\alias{metis.lite} |
|
||||
\alias{metis.lite-package} |
|
||||
\title{Access and Query Amazon Athena via DBI/JDBC} |
|
||||
\description{ |
|
||||
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, |
|
||||
perform queries and retrieve query results. A lightweight 'RJDBC' implementation |
|
||||
is included along with an interface to the 'AWS' command-line utility. |
|
||||
} |
|
||||
\section{IMPORTANT}{ |
|
||||
|
|
||||
|
|
||||
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting |
|
||||
Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid |
|
||||
signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is |
|
||||
loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this |
|
||||
in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) { |
|
||||
options( |
|
||||
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") |
|
||||
) |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
\references{ |
|
||||
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} |
|
||||
} |
|
||||
\author{ |
|
||||
Bob Rudis (bob@rud.is) |
|
||||
} |
|
||||
\keyword{internal} |
|
@ -1,2 +1,2 @@ |
|||||
library(testthat) |
library(testthat) |
||||
test_check("metis-lite") |
test_check("metis") |
||||
|
@ -1,6 +0,0 @@ |
|||||
context("basic functionality") |
|
||||
test_that("we can do something", { |
|
||||
|
|
||||
#expect_that(some_function(), is_a("data.frame")) |
|
||||
|
|
||||
}) |
|
@ -0,0 +1,28 @@ |
|||||
|
context("Driver & queries work") |
||||
|
|
||||
|
skip_on_cran() |
||||
|
|
||||
|
drv <- metis::Athena() |
||||
|
|
||||
|
expect_is(drv, "AthenaDriver") |
||||
|
|
||||
|
dbConnect( |
||||
|
drv = drv, |
||||
|
schema_name = "sampledb", |
||||
|
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", |
||||
|
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), |
||||
|
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", |
||||
|
) -> con |
||||
|
|
||||
|
expect_is(con, "AthenaConnection") |
||||
|
|
||||
|
expect_equal(dbListTables(con, schema="sampledb"), "elb_logs") |
||||
|
|
||||
|
expect_true(dbExistsTable(con, "elb_logs", schema="sampledb")) |
||||
|
|
||||
|
expect_true("url" %in% dbListFields(con, "elb_logs", "sampledb")) |
||||
|
|
||||
|
expect_is( |
||||
|
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10"), |
||||
|
"data.frame" |
||||
|
) |
Loading…
Reference in new issue