mirror of https://git.sr.ht/~hrbrmstr/metis
27 changed files with 232 additions and 394 deletions
@ -1,48 +0,0 @@ |
|||
#' Access and Query Amazon Athena via DBI/JDBC |
|||
#' |
|||
#' Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, |
|||
#' perform queries and retrieve query results. A lightweight 'RJDBC' implementation |
|||
#' is included along with an interface to the 'AWS' command-line utility. |
|||
#' |
|||
#' @name metis.lite |
|||
#' |
|||
#' @section IMPORTANT: |
|||
#' |
|||
#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting |
|||
#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid |
|||
#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is |
|||
#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this |
|||
#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: |
|||
#' |
|||
#' |
|||
#' if (!grepl("-Xrs", getOption("java.parameters", ""))) { |
|||
#' options( |
|||
#' "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") |
|||
#' ) |
|||
#' } |
|||
#' |
|||
#' @md |
|||
#' @encoding UTF-8 |
|||
#' @keywords internal |
|||
#' @docType package |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import RJDBC DBI bit64 dbplyr |
|||
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) |
|||
NULL |
|||
|
|||
|
|||
#' Use Credentials from .aws/credentials File |
|||
#' |
|||
#' @md |
|||
#' @importFrom aws.signature use_credentials read_credentials |
|||
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] |
|||
#' @name use_credentials |
|||
#' @rdname use_credentials |
|||
#' @inheritParams aws.signature::use_credentials |
|||
#' @export |
|||
NULL |
|||
|
|||
#' @name read_credentials |
|||
#' @rdname use_credentials |
|||
#' @export |
|||
NULL |
@ -0,0 +1,33 @@ |
|||
#' Access and Query Amazon Athena via DBI/JDBC |
|||
#' |
|||
#' Methods are provided to connect to 'Amazon' 'Athena', lookup |
|||
#' schemas/tables, perform queries and retrieve query results using the |
|||
#' Athena JDBC driver found in 'metis.jars'. |
|||
#' |
|||
#' @name metis |
|||
#' |
|||
#' @md |
|||
#' @encoding UTF-8 |
|||
#' @keywords internal |
|||
#' @docType package |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import RJDBC DBI bit64 metis.jars |
|||
#' @importFrom methods as callNextMethod |
|||
#' @references [Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide](https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf) |
|||
NULL |
|||
|
|||
#' Use Credentials from .aws/credentials File |
|||
#' |
|||
#' @md |
|||
#' @importFrom aws.signature use_credentials read_credentials |
|||
#' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] |
|||
#' @name use_credentials |
|||
#' @rdname use_credentials |
|||
#' @inheritParams aws.signature::use_credentials |
|||
#' @export |
|||
NULL |
|||
|
|||
#' @name read_credentials |
|||
#' @rdname use_credentials |
|||
#' @export |
|||
NULL |
@ -1,124 +0,0 @@ |
|||
#' @rdname Athena |
|||
#' @keywords internal |
|||
#' @export |
|||
db_data_type.AthenaConnection <- function(con, fields, ...) { |
|||
print("\n\n\ndb_data_type\n\n\n") |
|||
data_type <- function(x) { |
|||
switch( |
|||
class(x)[1], |
|||
integer64 = "BIGINT", |
|||
logical = "BOOLEAN", |
|||
integer = "INTEGER", |
|||
numeric = "DOUBLE", |
|||
factor = "CHARACTER", |
|||
character = "CHARACTER", |
|||
Date = "DATE", |
|||
POSIXct = "TIMESTAMP", |
|||
stop("Can't map type ", paste(class(x), collapse = "/"), |
|||
" to a supported database type.") |
|||
) |
|||
} |
|||
vapply(fields, data_type, character(1)) |
|||
} |
|||
|
|||
#' @rdname Athena |
|||
#' @keywords internal |
|||
#' @export |
|||
sql_translate_env.AthenaConnection <- function(con) { |
|||
|
|||
x <- con |
|||
|
|||
dbplyr::sql_variant( |
|||
|
|||
scalar = dbplyr::sql_translator( |
|||
.parent = dbplyr::base_scalar, |
|||
`!=` = dbplyr::sql_infix("<>"), |
|||
as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"), |
|||
as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"), |
|||
as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"), |
|||
as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), |
|||
as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"), |
|||
as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), |
|||
as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"), |
|||
as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"), |
|||
date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"), |
|||
grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"), |
|||
gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"), |
|||
trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"), |
|||
cbrt = dbplyr::sql_prefix("CBRT", 1), |
|||
degrees = dbplyr::sql_prefix("DEGREES", 1), |
|||
e = dbplyr::sql_prefix("E", 0), |
|||
row_number = dbplyr::sql_prefix("row_number", 0), |
|||
lshift = dbplyr::sql_prefix("LSHIFT", 2), |
|||
mod = dbplyr::sql_prefix("MOD", 2), |
|||
age = dbplyr::sql_prefix("AGE", 1), |
|||
negative = dbplyr::sql_prefix("NEGATIVE", 1), |
|||
pi = dbplyr::sql_prefix("PI", 0), |
|||
pow = dbplyr::sql_prefix("POW", 2), |
|||
radians = dbplyr::sql_prefix("RADIANS", 1), |
|||
rand = dbplyr::sql_prefix("RAND", 0), |
|||
rshift = dbplyr::sql_prefix("RSHIFT", 2), |
|||
trunc = dbplyr::sql_prefix("TRUNC", 2), |
|||
contains = dbplyr::sql_prefix("CONTAINS", 2), |
|||
convert_to = dbplyr::sql_prefix("CONVERT_TO", 2), |
|||
convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2), |
|||
string_binary = dbplyr::sql_prefix("STRING_BINARY", 1), |
|||
binary_string = dbplyr::sql_prefix("BINARY_STRING", 1), |
|||
to_char = dbplyr::sql_prefix("TO_CHAR", 2), |
|||
to_date = dbplyr::sql_prefix("TO_DATE", 2), |
|||
to_number = dbplyr::sql_prefix("TO_NUMBER", 2), |
|||
char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2), |
|||
double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1), |
|||
char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1), |
|||
flatten = dbplyr::sql_prefix("FLATTEN", 1), |
|||
kvgen = dbplyr::sql_prefix("KVGEN", 1), |
|||
repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1), |
|||
repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2), |
|||
ilike = dbplyr::sql_prefix("ILIKE", 2), |
|||
init_cap = dbplyr::sql_prefix("INIT_CAP", 1), |
|||
length = dbplyr::sql_prefix("LENGTH", 1), |
|||
lower = dbplyr::sql_prefix("LOWER", 1), |
|||
tolower = dbplyr::sql_prefix("LOWER", 1), |
|||
ltrim = dbplyr::sql_prefix("LTRIM", 2), |
|||
nullif = dbplyr::sql_prefix("NULLIF", 2), |
|||
position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"), |
|||
regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3), |
|||
rtrim = dbplyr::sql_prefix("RTRIM", 2), |
|||
rpad = dbplyr::sql_prefix("RPAD", 2), |
|||
rpad_with = dbplyr::sql_prefix("RPAD", 3), |
|||
lpad = dbplyr::sql_prefix("LPAD", 2), |
|||
lpad_with = dbplyr::sql_prefix("LPAD", 3), |
|||
strpos = dbplyr::sql_prefix("STRPOS", 2), |
|||
substr = dbplyr::sql_prefix("SUBSTR", 3), |
|||
trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"), |
|||
upper = dbplyr::sql_prefix("UPPER", 1), |
|||
toupper = dbplyr::sql_prefix("UPPER", 1) |
|||
), |
|||
|
|||
aggregate = dbplyr::sql_translator( |
|||
.parent = dbplyr::base_agg, |
|||
n = function() dbplyr::sql("COUNT(*)"), |
|||
cor = dbplyr::sql_prefix("CORR"), |
|||
cov = dbplyr::sql_prefix("COVAR_SAMP"), |
|||
sd = dbplyr::sql_prefix("STDDEV_SAMP"), |
|||
var = dbplyr::sql_prefix("VAR_SAMP"), |
|||
n_distinct = function(x) { |
|||
dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")")) |
|||
} |
|||
), |
|||
|
|||
window = dbplyr::sql_translator( |
|||
.parent = dbplyr::base_win, |
|||
n = function() { dbplyr::win_over(dbplyr::sql("count(*)"), |
|||
partition = dbplyr::win_current_group()) }, |
|||
cor = dbplyr::win_recycled("corr"), |
|||
cov = dbplyr::win_recycled("covar_samp"), |
|||
sd = dbplyr::win_recycled("stddev_samp"), |
|||
var = dbplyr::win_recycled("var_samp"), |
|||
all = dbplyr::win_recycled("bool_and"), |
|||
any = dbplyr::win_recycled("bool_or") |
|||
) |
|||
|
|||
) |
|||
|
|||
} |
@ -1,11 +1,2 @@ |
|||
.onLoad <- function(libname, pkgname) { |
|||
rJava::.jpackage(pkgname, jars = "*", lib.loc = libname) |
|||
rJava::.jaddClassPath(dir(file.path(getwd(), "inst/java"), full.names = TRUE)) |
|||
o <- getOption("java.parameters", "") |
|||
if (!any(grepl("-Xrs", o))) { |
|||
packageStartupMessage( |
|||
"Did not find '-Xrs' in java.parameters option. Until rJava is updated, ", |
|||
"please set this up in your/an Rprofile or at the start of scripts." |
|||
) |
|||
} |
|||
} |
|||
|
Binary file not shown.
@ -1 +0,0 @@ |
|||
log4j.rootLogger=WARN |
@ -0,0 +1,18 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/z-dbGetQuery.R |
|||
\docType{methods} |
|||
\name{dbGetInfo,AthenaConnection-method} |
|||
\alias{dbGetInfo,AthenaConnection-method} |
|||
\title{Retrieve connection/driver//database metadata} |
|||
\usage{ |
|||
\S4method{dbGetInfo}{AthenaConnection}(dbObj, ...) |
|||
} |
|||
\arguments{ |
|||
\item{dbObj}{driver/connection} |
|||
|
|||
\item{...}{unused} |
|||
} |
|||
\description{ |
|||
Retrieve connection/driver//database metadata |
|||
} |
|||
\keyword{internal} |
@ -0,0 +1,18 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/z-dbGetQuery.R |
|||
\docType{methods} |
|||
\name{dbGetInfo,AthenaDriver-method} |
|||
\alias{dbGetInfo,AthenaDriver-method} |
|||
\title{Retrieve connection/driver/database metadata} |
|||
\usage{ |
|||
\S4method{dbGetInfo}{AthenaDriver}(dbObj, ...) |
|||
} |
|||
\arguments{ |
|||
\item{dbObj}{driver/connection} |
|||
|
|||
\item{...}{unused} |
|||
} |
|||
\description{ |
|||
Retrieve connection/driver/database metadata |
|||
} |
|||
\keyword{internal} |
@ -0,0 +1,20 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/metis-package.R |
|||
\docType{package} |
|||
\encoding{UTF-8} |
|||
\name{metis} |
|||
\alias{metis} |
|||
\alias{metis-package} |
|||
\title{Access and Query Amazon Athena via DBI/JDBC} |
|||
\description{ |
|||
Methods are provided to connect to 'Amazon' 'Athena', lookup |
|||
schemas/tables, perform queries and retrieve query results using the |
|||
Athena JDBC driver found in 'metis.jars'. |
|||
} |
|||
\references{ |
|||
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} |
|||
} |
|||
\author{ |
|||
Bob Rudis (bob@rud.is) |
|||
} |
|||
\keyword{internal} |
@ -1,35 +0,0 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/metis-lite-package.R |
|||
\docType{package} |
|||
\encoding{UTF-8} |
|||
\name{metis.lite} |
|||
\alias{metis.lite} |
|||
\alias{metis.lite-package} |
|||
\title{Access and Query Amazon Athena via DBI/JDBC} |
|||
\description{ |
|||
Methods are provides to connect to 'Amazon' 'Athena', lookup schemas/tables, |
|||
perform queries and retrieve query results. A lightweight 'RJDBC' implementation |
|||
is included along with an interface to the 'AWS' command-line utility. |
|||
} |
|||
\section{IMPORTANT}{ |
|||
|
|||
|
|||
Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting |
|||
Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid |
|||
signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is |
|||
loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this |
|||
in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) { |
|||
options( |
|||
"java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") |
|||
) |
|||
} |
|||
} |
|||
} |
|||
|
|||
\references{ |
|||
\href{https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.6/docs/Simba+Athena+JDBC+Driver+Install+and+Configuration+Guide.pdf}{Simba Athena JDBC Driver with SQL Connector Installation and Configuration Guide} |
|||
} |
|||
\author{ |
|||
Bob Rudis (bob@rud.is) |
|||
} |
|||
\keyword{internal} |
@ -1,2 +1,2 @@ |
|||
library(testthat) |
|||
test_check("metis-lite") |
|||
test_check("metis") |
|||
|
@ -1,6 +0,0 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
|
|||
#expect_that(some_function(), is_a("data.frame")) |
|||
|
|||
}) |
@ -0,0 +1,28 @@ |
|||
context("Driver & queries work") |
|||
|
|||
skip_on_cran() |
|||
|
|||
drv <- metis::Athena() |
|||
|
|||
expect_is(drv, "AthenaDriver") |
|||
|
|||
dbConnect( |
|||
drv = drv, |
|||
schema_name = "sampledb", |
|||
provider = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider", |
|||
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props"), |
|||
s3_staging_dir = "s3://aws-athena-query-results-569593279821-us-east-1", |
|||
) -> con |
|||
|
|||
expect_is(con, "AthenaConnection") |
|||
|
|||
expect_equal(dbListTables(con, schema="sampledb"), "elb_logs") |
|||
|
|||
expect_true(dbExistsTable(con, "elb_logs", schema="sampledb")) |
|||
|
|||
expect_true("url" %in% dbListFields(con, "elb_logs", "sampledb")) |
|||
|
|||
expect_is( |
|||
dbGetQuery(con, "SELECT * FROM sampledb.elb_logs LIMIT 10"), |
|||
"data.frame" |
|||
) |
Loading…
Reference in new issue