From cea670e412536becf1ade4edf8d7ec9df19c0cc8 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Sat, 16 Feb 2019 06:52:27 -0500 Subject: [PATCH] update --- .Rbuildignore | 1 + DESCRIPTION | 10 +- LICENSE | 2 + LICENSE.md | 21 ++++ NAMESPACE | 10 +- R/a-utils.R | 12 +++ R/jdbc.r | 120 ++------------------- R/metis-lite-package.R | 24 +---- R/z-dbGetQuery.R | 71 ++++++++++++ man/Athena.Rd | 4 +- man/dbConnect-AthenaDriver-method.Rd | 4 +- ...dbGetQuery-AthenaConnection-character-method.Rd | 2 +- man/metis.lite.Rd | 28 ++--- 13 files changed, 140 insertions(+), 169 deletions(-) create mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 R/a-utils.R create mode 100644 R/z-dbGetQuery.R diff --git a/.Rbuildignore b/.Rbuildignore index edf18c6..f31e09b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^LICENSE\.md$ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.travis\.yml$ diff --git a/DESCRIPTION b/DESCRIPTION index 977e571..849a9c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: Methods are provides to connect to 'Amazon' 'Athena', lookup schema perform queries and retrieve query results. A lightweight 'RJDBC' implementation is included along with an interface to the 'AWS' command-line utility. SystemRequirements: JDK 1.8+ -License: AGPL +License: MIT + file LICENSE Suggests: testthat, covr @@ -23,9 +23,7 @@ Depends: Imports: rJava, DBI, - dplyr, - readr, - aws.signature, - uuid, - jsonlite + bit64, + dbplyr, + aws.signature RoxygenNote: 6.1.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b6a10f1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2019 +COPYRIGHT HOLDER: Bob Rudis diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..c36552c --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2019 Bob Rudis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index c14e274..e85874e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,10 @@ # Generated by roxygen2: do not edit by hand -S3method(db_data_type,AthenaConnection) -S3method(sql_translate_env,AthenaConnection) export(Athena) export(athena_connect) +export(db_data_type.AthenaConnection) export(read_credentials) +export(sql_translate_env.AthenaConnection) export(use_credentials) exportClasses(AthenaConnection) exportClasses(AthenaDriver) @@ -18,11 +18,7 @@ exportMethods(dbReadTable) exportMethods(dbSendQuery) import(DBI) import(RJDBC) -import(dplyr) +import(bit64) importFrom(aws.signature,read_credentials) importFrom(aws.signature,use_credentials) -importFrom(jsonlite,fromJSON) importFrom(rJava,.jcall) -importFrom(readr,type_convert) -importFrom(sys,exec_internal) -importFrom(uuid,UUIDgenerate) diff --git a/R/a-utils.R b/R/a-utils.R new file mode 100644 index 0000000..17268fa --- /dev/null +++ b/R/a-utils.R @@ -0,0 +1,12 @@ +set_names <- function (object = nm, nm) { + names(object) <- nm + object +} + +as_date <- function(x) { + as.Date(x, origin = "1970-01-01") +} + +as_posixct <- function(x) { + as.POSIXct(x, origin = "1970-01-01 00:00:00") +} diff --git a/R/jdbc.r b/R/jdbc.r index d99a140..3859cbc 100644 --- a/R/jdbc.r +++ b/R/jdbc.r @@ -1,31 +1,9 @@ -.jt <- list( - BIT = -7, - TINYINT = -6, - BIGINT = -5, - LONGVARBINARY = -4, - VARBINARY = -3, - BINARY = -2, - LONGVARCHAR = -1, - .NULL = 0, - CHAR = 1, - NUMERIC = 2, - DECIMAL = 3, - INTEGER = 4, - SMALLINT = 5, - FLOAT = 6, - REAL = 7, - DOUBLE = 8, - VARCHAR = 12, - DATE = 91, - TIME = 92, - TIMESTAMP = 93, - OTHER = 1111 -) - -stats::setNames( +structure( 0:6, - c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE") -) -> .ll_trans + .Names = c( + "OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE" + ) +)-> .ll_trans #' AthenaJDBC #' @@ -85,10 +63,8 @@ setMethod( max_error_retries = 10, connection_timeout = 10000, socket_timeout = 10000, - # retry_base_delay = 100, - # retry_max_backoff_time = 1000, - log_path, - log_level, + log_path = "", + log_level = 0, ...) { conn_string = sprintf( @@ -105,8 +81,6 @@ setMethod( MaxErrorRetry = max_error_retries, ConnectTimeout = connection_timeout, SocketTimeout = socket_timeout, - # retry_base_delay = retry_base_delay, - # retry_max_backoff_time = retry_max_backoff_time, LogPath = log_path, LogLevel = log_level, AwsCredentialsProviderClass = provider, @@ -149,86 +123,6 @@ setMethod( #' AthenaJDBC #' #' @param conn Athena connection -#' @param statement SQL statement -#' @param ... unused -#' @importFrom rJava .jcall -#' @export -setMethod( - - "dbGetQuery", - signature(conn="AthenaConnection", statement="character"), - - definition = function(conn, statement, type_convert=FALSE, ...) { - - r <- dbSendQuery(conn, statement, ...) - - on.exit(.jcall(r@stat, "V", "close")) - - structure( - list( - cols = list(), - default = structure(list(), class = c("collector_guess", "collector")) - ), - class = "col_spec" - ) -> l - - cols <- .jcall(r@md, "I", "getColumnCount") - nms <- c() - - cts <- rep(0L, cols) - for (i in 1:cols) { - ct <- .jcall(r@md, "I", "getColumnType", i) - if (ct == .jt$CHAR) { - l$cols[[i]] <- col_character() - } else if (ct == .jt$NUMERIC) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$DECIMAL) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$INTEGER) { - l$cols[[i]] <- col_integer() - } else if (ct == .jt$SMALLINT) { - l$cols[[i]] <- col_integer() - } else if (ct == .jt$TINYINT) { - l$cols[[i]] <- col_integer() - } else if (ct == .jt$BIGINT) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$FLOAT) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$REAL) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$DOUBLE) { - l$cols[[i]] <- col_double() - } else if (ct == .jt$VARCHAR) { - l$cols[[i]] <- col_character() - } else if (ct == .jt$DATE) { - l$cols[[i]] <- col_date() - } else if (ct == .jt$TIME) { - l$cols[[i]] <- col_time() - } else if (ct == .jt$TIMESTAMP) { - l$cols[[i]] <- col_datetime() - } else if (ct == .jt$BIT) { - l$cols[[i]] <- col_logical() - } else { - l$cols[[i]] <- col_character() - } - nms[i] <- .jcall(r@md, "S", "getColumnLabel", i) - } - - l$cols <- stats::setNames(l$cols, nms) - - res <- fetch(r, -1, block=1000) - res <- readr::type_convert(res, col_types = l) - - class(res) <- c("tbl_df", "tbl", "data.frame") - - res - - } - -) -#' AthenaJDBC -#' -#' @param conn Athena connection #' @param pattern table name pattern #' @param schema Athena schema name #' @param ... unused diff --git a/R/metis-lite-package.R b/R/metis-lite-package.R index 3d72b10..11f6182 100644 --- a/R/metis-lite-package.R +++ b/R/metis-lite-package.R @@ -10,16 +10,7 @@ #' #' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting #' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid -#' signals being passed on to the JVM owner. That has to be done _#' before_ `rJava` is -#' loaded so you either need to remember to put it at the top of all scripts _or_ stick this -#' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: -#' -#' -#' @section IMPORTANT: -#' -#' Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting -#' Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid -#' signals being passed on to the JVM owner. That has to be done _#' before_ `rJava` is +#' signals being passed on to the JVM owner. That has to be done _before_ `rJava` is #' loaded so you either need to remember to put it at the top of all scripts _or_ stick this #' in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: #' @@ -30,24 +21,19 @@ #' ) #' } #' +#' @md #' @encoding UTF-8 +#' @keywords internal #' @docType package #' @author Bob Rudis (bob@@rud.is) -#' @import RJDBC -#' @keywords internal -#' @import DBI -#' @import dplyr -#' @importFrom jsonlite fromJSON -#' @importFrom readr type_convert -#' @importFrom uuid UUIDgenerate -#' @importFrom sys exec_internal -#' @importFrom aws.signature use_credentials read_credentials +#' @import RJDBC DBI bit64 dbplyr NULL #' Use Credentials from .aws/credentials File #' #' @md +#' @importFrom aws.signature use_credentials read_credentials #' @references [aws.signature::use_credentials()] / [aws.signature::read_credentials()] #' @name use_credentials #' @rdname use_credentials diff --git a/R/z-dbGetQuery.R b/R/z-dbGetQuery.R new file mode 100644 index 0000000..5440275 --- /dev/null +++ b/R/z-dbGetQuery.R @@ -0,0 +1,71 @@ +list( + "-7" = as.logical, # BIT + "-6" = as.integer, # TINYINT + "-5" = bit64::as.integer64, # BIGINT + "-4" = as.character, # LONGVARBINARY + "-3" = as.character, # VARBINARY + "-2" = as.character, # BINARY + "-1" = as.character, # LONGVARCHAR + "0" = as.character, # NULL + "1" = as.character, # CHAR + "2" = as.double, # NUMERIC + "3" = as.double, # DECIMAL + "4" = as.integer, # INTEGER + "5" = as.integer, # SMALLINT + "6" = as.double, # FLOAT + "7" = as.double, # REAL + "8" = as.double, # DOUBLE + "12" = as.character, # VARCHAR + "16" = as.logical, # BOOLEAN + "91" = as_date, # DATE + "92" = as.character, # TIME + "93" = as_posixct, # TIMESTAMP + "1111" = as.character # OTHER +) -> .jdbc_converters + +#' AthenaJDBC +#' +#' @param conn Athena connection +#' @param statement SQL statement +#' @param ... unused +#' @importFrom rJava .jcall +#' @export +setMethod( + + "dbGetQuery", + signature(conn="AthenaConnection", statement="character"), + + definition = function(conn, statement, type_convert=FALSE, ...) { + + r <- dbSendQuery(conn, statement, ...) + + on.exit(.jcall(r@stat, "V", "close")) + + #message("dbGetQuery()") + + nms <- c() + athena_type_convert <- list() + + cols <- .jcall(r@md, "I", "getColumnCount") + + for (i in 1:cols) { + ct <- as.character(.jcall(r@md, "I", "getColumnType", i)) + athena_type_convert[[i]] <- .jdbc_converters[[ct]] + nms <- c(nms, .jcall(r@md, "S", "getColumnLabel", i)) + } + + athena_type_convert <- set_names(athena_type_convert, nms) + + res <- fetch(r, -1, block = 1000) + + for (nm in names(athena_type_convert)) { + res[[nm]] <- athena_type_convert[[nm]](res[[nm]]) + } + + class(res) <- c("tbl_df", "tbl", "data.frame") + + res + + } + +) \ No newline at end of file diff --git a/man/Athena.Rd b/man/Athena.Rd index 0f5dfa5..f919af0 100644 --- a/man/Athena.Rd +++ b/man/Athena.Rd @@ -8,9 +8,9 @@ \usage{ Athena(identifier.quote = "`") -\method{db_data_type}{AthenaConnection}(con, fields, ...) +db_data_type.AthenaConnection(con, fields, ...) -\method{sql_translate_env}{AthenaConnection}(con) +sql_translate_env.AthenaConnection(con) } \description{ AthenaJDBC diff --git a/man/dbConnect-AthenaDriver-method.Rd b/man/dbConnect-AthenaDriver-method.Rd index 0db47cd..98d65e2 100644 --- a/man/dbConnect-AthenaDriver-method.Rd +++ b/man/dbConnect-AthenaDriver-method.Rd @@ -10,8 +10,8 @@ region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), schema_name = "default", max_error_retries = 10, - connection_timeout = 10000, socket_timeout = 10000, log_path, - log_level, ...) + connection_timeout = 10000, socket_timeout = 10000, log_path = "", + log_level = 0, ...) } \arguments{ \item{provider}{JDBC auth provider (ideally leave default)} diff --git a/man/dbGetQuery-AthenaConnection-character-method.Rd b/man/dbGetQuery-AthenaConnection-character-method.Rd index aa9b744..6261007 100644 --- a/man/dbGetQuery-AthenaConnection-character-method.Rd +++ b/man/dbGetQuery-AthenaConnection-character-method.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jdbc.r +% Please edit documentation in R/z-dbGetQuery.R \docType{methods} \name{dbGetQuery,AthenaConnection,character-method} \alias{dbGetQuery,AthenaConnection,character-method} diff --git a/man/metis.lite.Rd b/man/metis.lite.Rd index fb17bc6..511f31c 100644 --- a/man/metis.lite.Rd +++ b/man/metis.lite.Rd @@ -15,25 +15,15 @@ is included along with an interface to the 'AWS' command-line utility. Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting -Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid -signals being passed on to the JVM owner. That has to be done _#' before_ `rJava` is -loaded so you either need to remember to put it at the top of all scripts _or_ stick this -in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: - - - -Since R 3.5 (I don't remember this happening in R 3.4.x) signals sent from interrupting -Athena JDBC calls crash the R #' interpreter. You need to set the `-Xrs` option to avoid -signals being passed on to the JVM owner. That has to be done _#' before_ `rJava` is -loaded so you either need to remember to put it at the top of all scripts _or_ stick this -in your local #' `~/.Rprofile` and/or sitewide `Rprofile`: - - - if (!grepl("-Xrs", getOption("java.parameters", ""))) { - options( - "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") - ) - } +Athena JDBC calls crash the R #' interpreter. You need to set the \code{-Xrs} option to avoid +signals being passed on to the JVM owner. That has to be done \emph{before} \code{rJava} is +loaded so you either need to remember to put it at the top of all scripts \emph{or} stick this +in your local #' \code{~/.Rprofile} and/or sitewide \code{Rprofile}:\preformatted{if (!grepl("-Xrs", getOption("java.parameters", ""))) { + options( + "java.parameters" = c(getOption("java.parameters", default = NULL), "-Xrs") + ) +} +} } \author{