Browse Source

workgroup

master
boB Rudis 4 years ago
parent
commit
d164f3051d
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 6
      DESCRIPTION
  2. 3
      NAMESPACE
  3. 4
      NEWS.md
  4. 5
      R/collect-async.R
  5. 3
      R/download-query-ex-res.R
  6. 48
      R/get-query-results-meta.R
  7. 4
      R/s3-download-file.R
  8. 5
      R/start-query-execution.R
  9. 65
      R/type-trans.R
  10. 5
      R/utils.R
  11. 1
      README.Rmd
  12. 21
      man/athena_type_trans.Rd
  13. 1
      man/awsathena.Rd
  14. 18
      man/collect_async.Rd
  15. 15
      man/download_query_execution_results.Rd
  16. 10
      man/get_query_execution.Rd
  17. 28
      man/get_query_results_metadata.Rd
  18. 10
      man/list_query_executions.Rd
  19. 18
      man/s3_download_file.Rd
  20. 18
      man/start_query_execution.Rd
  21. 10
      man/stop_query_execution.Rd

6
DESCRIPTION

@ -1,8 +1,8 @@
Package: awsathena
Type: Package
Title: rJava Interface to AWS Athena SDK
Version: 0.1.0
Date: 2019-02-22
Version: 0.2.0
Date: 2019-06-14
Authors@R: c(
person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is")
)
@ -25,6 +25,6 @@ Depends:
Imports:
uuid,
utils
RoxygenNote: 6.1.1
RoxygenNote: 7.0.2
Remotes:
hrbrmstr/awsathenajars

3
NAMESPACE

@ -1,12 +1,15 @@
# Generated by roxygen2: do not edit by hand
export(athena_type_trans)
export(collect_async)
export(download_query_execution_results)
export(get_query_execution)
export(get_query_results_metadata)
export(list_query_executions)
export(s3_download_file)
export(start_query_execution)
export(stop_query_execution)
export(to_cols)
import(awsathenajars)
import(rJava)
importFrom(utils,setTxtProgressBar)

4
NEWS.md

@ -1,3 +1,7 @@
0.3.0
* buffer for download is now a parameter
* added `get_query_results_metadata()` to enable retrieval of col types
0.2.0
* Split into two packages as per CRAN rJava-package suggested practice.

5
R/collect-async.R

@ -29,6 +29,7 @@
#' you wish to use
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @param workgroup workgroup
#' @note `dbplyr` must be installed for this to work. It is not listed in
#' the `Imports` as it brings with it many dependencies that may not
#' be necessary for general use of this package.
@ -71,6 +72,7 @@ collect_async <- function(obj,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
workgroup = "primary",
properties_file = NULL) {
if (!requireNamespace("dbplyr", quietly = TRUE)) {
@ -85,7 +87,8 @@ collect_async <- function(obj,
kms_key = kms_key,
region = region,
profile = profile,
properties_file = properties_file
properties_file = properties_file,
workgroup = workgroup
)
}

3
R/download-query-ex-res.R

@ -11,6 +11,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#' properties file then provide the named profile from `~/.aws/credentials`
#' you wish to use
#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @export
@ -19,6 +20,7 @@ download_query_execution_results <- function(qxid,
progress = FALSE,
region = "us-east-1",
profile = NULL,
buffer_size = 16384L,
properties_file = NULL) {
if (missing(output_dir)) output_dir <- getwd()
@ -38,6 +40,7 @@ download_query_execution_results <- function(qxid,
progress = progress,
region = region,
profile = profile,
buffer_size = buffer_size,
properties_file = properties_file
) -> res

48
R/get-query-results-meta.R

@ -0,0 +1,48 @@
#' Get Query Execution Results Metadata (Schema)
#'
#' @md
#' @param qxid query execution id
#' @param region AWS region string
#' @param profile if not using the default credentials chain or a dedicated
#' properties file then provide the named profile from `~/.aws/credentials`
#' you wish to use
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @export
get_query_results_metadata <- function(qxid, region = "us-east-1", profile = NULL, properties_file = NULL) {
client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)
qx_req <- .jnew("com.amazonaws.services.athena.model.GetQueryResultsRequest")
qx_req$setQueryExecutionId(qxid)
qx_req$setMaxResults(.jnew(class = "java/lang/Integer", "1"))
res <- client$getQueryResults(qx_req)
res_rs <- res$getResultSet()
res_md <- res_rs$getResultSetMetadata()
res_ci <- res_md$getColumnInfo()
lapply(res_ci, function(.x) {
data.frame(
name = .x$getName(),
type = .x$getType(),
caseSensitive = .x$getCaseSensitive(),
catalogName = .x$getCatalogName(),
label = .x$getLabel(),
nullable = .x$getNullable(),
precision = .x$getPrecision(),
scale = .x$getScale(),
schemaName = .x$getSchemaName(),
tableName = .x$getTableName(),
stringsAsFactors = FALSE
)
}) %>%
do.call(rbind.data.frame, .) -> out
class(out) <- c("athena_query_metadata", "tbl_df", "tbl", "data.frame")
client$shutdown()
out
}

4
R/s3-download-file.R

@ -8,6 +8,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#' properties file then provide the named profile from `~/.aws/credentials`
#' you wish to use
#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @export
@ -15,6 +16,7 @@ s3_download_file <- function(bucket, key, output_dir,
progress = FALSE,
region = "us-east-1",
profile = NULL,
buffer_size = 16384L,
properties_file = NULL) {
aws_s3_client(
@ -34,7 +36,7 @@ s3_download_file <- function(bucket, key, output_dir,
s3is <- obj$getObjectContent()
buf <- raw(4096)
buf <- raw(buffer_size)
jbuf <- .jarray(buf)
read_len <- s3is$read(jbuf)

5
R/start-query-execution.R

@ -21,6 +21,7 @@
#' you wish to use
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @param workgroup workgroup
#' @export
start_query_execution <- function(query, database, output_location,
client_request_token = uuid::UUIDgenerate(),
@ -28,7 +29,8 @@ start_query_execution <- function(query, database, output_location,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
properties_file = NULL) {
properties_file = NULL,
workgroup = "primary") {
client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)
@ -43,6 +45,7 @@ start_query_execution <- function(query, database, output_location,
qx_req <- qx_req$withQueryExecutionContext(ctx)
qx_req <- qx_req$withResultConfiguration(res_cfg)
qx_req <- qx_req$withClientRequestToken(client_request_token)
qx_req <- qx_req$withWorkGroup(workgroup)
res <- client$startQueryExecution(qx_req)

65
R/type-trans.R

@ -0,0 +1,65 @@
#' Translate from one type system to another
#'
#' @param type type (character)
#' @param to one of `athena` or `r`
#' @export
athena_type_trans <- function(type, to = c("r", "athena")) {
if (match.arg(tolower(to[1]), c("athena", "r")) == "r") {
sapply(type, switch,
type,
boolean = "logical",
tinyint = "integer",
smallint = "integer",
int = "integer",
integer = "integer",
bigint = "integer64",
double = "double",
float = "double",
decimal = "double",
char = "character",
varchar = "character",
binary = "raw",
date = "Date",
timestamp = "POSIXct",
array = "character",
map = "character",
struct = "character"
)
} else {
sapply(
type, switch,
logical = "boolean",
integer = "integer",
integer64 = "bigint",
double = "double",
character = "varchar",
raw = "binary",
Date = "date",
POSIXct = "timestamp"
)
}
}
#' @rdname athena_type_trans
#' @param name,type equal length character vectors; type should be an R type
#' @export
to_cols <- function(name, type) {
lapply(
type, switch,
logical = readr::col_logical(),
integer = readr::col_integer(),
integer64 = readr::col_number(),
double = readr::col_double(),
character = readr::col_character(),
raw = readr::col_character(),
Date = readr::col_date(),
POSIXct = readr::col_datetime()
) -> l
l <- set_names(l)
do.call(readr::cols, l)
}

5
R/utils.R

@ -39,3 +39,8 @@
stop("No such region.", call.=FALSE)
)
}
set_names <- function (object = nm, nm) {
names(object) <- nm
object
}

1
README.Rmd

@ -20,6 +20,7 @@ The following functions are implemented:
- `collect_async`: Collect Amazon Athena 'dplyr' query results asynchronously
- `download_query_execution_results`: Use S3 to download the results of an Athena Query
- `get_query_execution`: Get Query Execution
- `get_query_results_metadata`: Get Query Execution Results Metadata (Schema)
- `list_query_executions`: List Query Executions
- `s3_download_file`: Download a key from a bucket to a local file
- `start_query_execution`: Start Query Execution

21
man/athena_type_trans.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/type-trans.R
\name{athena_type_trans}
\alias{athena_type_trans}
\alias{to_cols}
\title{Translate from one type system to another}
\usage{
athena_type_trans(type, to = c("r", "athena"))
to_cols(name, type)
}
\arguments{
\item{type}{type (character)}
\item{to}{one of `athena` or `r`}
\item{name, type}{equal length character vectors; type should be an R type}
}
\description{
Translate from one type system to another
}

1
man/awsathena.Rd

@ -3,7 +3,6 @@
\docType{package}
\name{awsathena}
\alias{awsathena}
\alias{awsathena-package}
\title{rJava Client to AWS Athena SDK}
\description{
Provides R wrapper methods to core 'aws-java-sdk-athena' Java library methods

18
man/collect_async.Rd

@ -4,10 +4,18 @@
\alias{collect_async}
\title{Collect Amazon Athena \code{dplyr} query results asynchronously}
\usage{
collect_async(obj, database, output_location,
collect_async(
obj,
database,
output_location,
client_request_token = uuid::UUIDgenerate(),
encryption_option = NULL, kms_key = NULL, region = "us-east-1",
profile = NULL, properties_file = NULL)
encryption_option = NULL,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
workgroup = "primary",
properties_file = NULL
)
}
\arguments{
\item{obj}{the \code{dplyr} query}
@ -33,9 +41,11 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{workgroup}{workgroup}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

15
man/download_query_execution_results.Rd

@ -4,8 +4,15 @@
\alias{download_query_execution_results}
\title{Use S3 to download the results of an Athena Query}
\usage{
download_query_execution_results(qxid, output_dir, progress = FALSE,
region = "us-east-1", profile = NULL, properties_file = NULL)
download_query_execution_results(
qxid,
output_dir,
progress = FALSE,
region = "us-east-1",
profile = NULL,
buffer_size = 16384L,
properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}
@ -18,9 +25,11 @@ the value) to where you want the results to be stored.}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

10
man/get_query_execution.Rd

@ -4,8 +4,12 @@
\alias{get_query_execution}
\title{Get Query Execution}
\usage{
get_query_execution(qxid, region = "us-east-1", profile = NULL,
properties_file = NULL)
get_query_execution(
qxid,
region = "us-east-1",
profile = NULL,
properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}
@ -13,7 +17,7 @@ get_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or

28
man/get_query_results_metadata.Rd

@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-query-results-meta.R
\name{get_query_results_metadata}
\alias{get_query_results_metadata}
\title{Get Query Execution Results Metadata (Schema)}
\usage{
get_query_results_metadata(
qxid,
region = "us-east-1",
profile = NULL,
properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}
\description{
Get Query Execution Results Metadata (Schema)
}

10
man/list_query_executions.Rd

@ -4,14 +4,18 @@
\alias{list_query_executions}
\title{List Query Executions}
\usage{
list_query_executions(region = "us-east-1", profile = NULL,
properties_file = NULL, max = NULL)
list_query_executions(
region = "us-east-1",
profile = NULL,
properties_file = NULL,
max = NULL
)
}
\arguments{
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or

18
man/s3_download_file.Rd

@ -4,11 +4,19 @@
\alias{s3_download_file}
\title{Download a key from a bucket to a local file}
\usage{
s3_download_file(bucket, key, output_dir, progress = FALSE,
region = "us-east-1", profile = NULL, properties_file = NULL)
s3_download_file(
bucket,
key,
output_dir,
progress = FALSE,
region = "us-east-1",
profile = NULL,
buffer_size = 16384L,
properties_file = NULL
)
}
\arguments{
\item{bucket, key}{S3 bucket and key (no \code{s3://} prefix)}
\item{bucket, key}{S3 bucket and key (no \verb{s3://} prefix)}
\item{output_dir}{where to store \code{key}}
@ -17,9 +25,11 @@ s3_download_file(bucket, key, output_dir, progress = FALSE,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

18
man/start_query_execution.Rd

@ -4,10 +4,18 @@
\alias{start_query_execution}
\title{Start Query Execution}
\usage{
start_query_execution(query, database, output_location,
start_query_execution(
query,
database,
output_location,
client_request_token = uuid::UUIDgenerate(),
encryption_option = NULL, kms_key = NULL, region = "us-east-1",
profile = NULL, properties_file = NULL)
encryption_option = NULL,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
properties_file = NULL,
workgroup = "primary"
)
}
\arguments{
\item{query}{SQL query statements to be executed}
@ -33,11 +41,13 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
\item{workgroup}{workgroup}
}
\description{
Start Query Execution

10
man/stop_query_execution.Rd

@ -4,8 +4,12 @@
\alias{stop_query_execution}
\title{Stop Query Execution}
\usage{
stop_query_execution(qxid, region = "us-east-1", profile = NULL,
properties_file = NULL)
stop_query_execution(
qxid,
region = "us-east-1",
profile = NULL,
properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}
@ -13,7 +17,7 @@ stop_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or

Loading…
Cancel
Save