mirror of https://git.sr.ht/~hrbrmstr/awsathena
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
97 lines
3.3 KiB
97 lines
3.3 KiB
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/collect-async.R
|
|
\name{collect_async}
|
|
\alias{collect_async}
|
|
\title{Collect Amazon Athena \code{dplyr} query results asynchronously}
|
|
\usage{
|
|
collect_async(
|
|
obj,
|
|
database,
|
|
output_location,
|
|
client_request_token = uuid::UUIDgenerate(),
|
|
encryption_option = NULL,
|
|
kms_key = NULL,
|
|
region = "us-east-1",
|
|
profile = NULL,
|
|
workgroup = "primary",
|
|
properties_file = NULL
|
|
)
|
|
}
|
|
\arguments{
|
|
\item{obj}{the \code{dplyr} query}
|
|
|
|
\item{database}{database within which the query executes.}
|
|
|
|
\item{output_location}{location in S3 where query results are stored.}
|
|
|
|
\item{client_request_token}{unique case-sensitive string used to ensure the
|
|
request to create the query is idempotent (executes only once). If another
|
|
\code{StartQueryExecution} request is received, the same response is returned
|
|
and another query is not created. If a parameter has changed, for example,
|
|
the \code{query} , an error is returned. \strong{This is auto-generated-for-you}.}
|
|
|
|
\item{encryption_option}{indicates whether Amazon S3 server-side encryption
|
|
with Amazon S3-managed keys (\code{SSE-S3}), server-side encryption with
|
|
KMS-managed keys (\code{SSE-KMS}), or client-side encryption with KMS-managed
|
|
keys (\code{CSE-KMS}) is used. Default is \code{NULL} (no encryption)}
|
|
|
|
\item{kms_key}{For \code{SSE-KMS} and \code{CSE-KMS}, this is the KMS key ARN or ID.
|
|
Default is \code{NULL} (no encryption)}
|
|
|
|
\item{region}{AWS region string}
|
|
|
|
\item{profile}{if not using the default credentials chain or a dedicated
|
|
properties file then provide the named profile from \verb{~/.aws/credentials}
|
|
you wish to use}
|
|
|
|
\item{workgroup}{workgroup}
|
|
|
|
\item{properties_file}{if not using the default credentials provider chain or
|
|
a named profile then provide the path to an Athena credentials proeprty file.}
|
|
}
|
|
\description{
|
|
Long running Athena queries and Athena queries with large result
|
|
sets can seriously stall a \code{dplyr} processing chain due to poorly
|
|
implemented ODBC and JDBC drivers. This function converts a \code{dplyr}
|
|
chain to a raw SQL query then submits it via \code{\link[=start_query_execution]{start_query_execution()}} and
|
|
returns the query execution id. You can retrieve the result set either
|
|
via \code{\link[=download_query_execution_results]{download_query_execution_results()}} or your favorite S3 downloading
|
|
method.
|
|
}
|
|
\note{
|
|
\code{dbplyr} must be installed for this to work. It is not listed in
|
|
the \code{Imports} as it brings with it many dependencies that may not
|
|
be necessary for general use of this package.
|
|
}
|
|
\examples{
|
|
\dontrun{
|
|
library(odbc)
|
|
library(DBI)
|
|
|
|
DBI::dbConnect(
|
|
odbc::odbc(),
|
|
driver = "/Library/simba/athenaodbc/lib/libathenaodbc_sbu.dylib",
|
|
Schema = "sampledb",
|
|
AwsRegion = "us-east-1",
|
|
AwsProfile = "personal",
|
|
AuthenticationType = "IAM Profile",
|
|
S3OutputLocation = "s3://aws-athena-query-results-redacted"
|
|
) -> con
|
|
|
|
elb_logs <- tbl(con, "elb_logs")
|
|
|
|
mutate(elb_logs, tsday = substr(timestamp, 1, 10)) \%>\%
|
|
filter(tsday == "2014-09-29") \%>\%
|
|
select(requestip, requestprocessingtime) \%>\%
|
|
collect_async(
|
|
database = "sampledb",
|
|
output_location = "s3://aws-athena-query-results-redacted",
|
|
profile = "personal"
|
|
) -> id
|
|
|
|
get_query_execution(id, profile = "personal")
|
|
|
|
# do this to get the data or use your favorite way to grab files from S3
|
|
get_query_results(id, profile = "personal")
|
|
}
|
|
}
|
|
|