|
|
|
---
|
|
|
|
output: rmarkdown::github_document
|
|
|
|
editor_options:
|
|
|
|
chunk_output_type: console
|
|
|
|
---
|
|
|
|
```{r include=FALSE}
|
|
|
|
knitr::opts_chunk$set(
|
|
|
|
echo = TRUE,
|
|
|
|
message = FALSE,
|
|
|
|
warning = FALSE,
|
|
|
|
fig.retina = 2
|
|
|
|
)
|
|
|
|
|
|
|
|
Sys.setenv(
|
|
|
|
AWS_S3_STAGING_DIR = "s3://aws-athena-query-results-569593279821-us-east-1"
|
|
|
|
)
|
|
|
|
|
|
|
|
options(width=120)
|
|
|
|
```
|
|
|
|
|
|
|
|
[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/metis-tidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/metis-tidy)
|
|
|
|
[![Coverage Status](https://codecov.io/gh/hrbrmstr/metis-tidy/branch/master/graph/badge.svg)](https://codecov.io/gh/hrbrmstr/metis-tidy
|
|
|
|
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/metis.tidy)](https://cran.r-project.org/package=metis.tidy)
|
|
|
|
|
|
|
|
# metis.tidy
|
|
|
|
|
|
|
|
Access and Query Amazon Athena via the Tidyverse
|
|
|
|
|
|
|
|
## Description
|
|
|
|
|
|
|
|
Methods are provided to use the 'metis' JDBC/DBI interface via the Tidyverse
|
|
|
|
(e.g. 'dbplyr'/'dplyr' idioms).
|
|
|
|
|
|
|
|
## What's Inside The Tin?
|
|
|
|
|
|
|
|
Lightweight helpers to make it easier to `filter` and `mutate` plus type support for Athena `BIGINT` (64-bit integers).
|
|
|
|
|
|
|
|
## Installation
|
|
|
|
|
|
|
|
```{r eval=FALSE}
|
|
|
|
devtools::install_git("https://git.sr.ht/~hrbrmstr/metis-tidy")
|
|
|
|
# OR
|
|
|
|
devtools::install_gitlab("hrbrmstr/metis-tidy")
|
|
|
|
# OR
|
|
|
|
devtools::install_github("hrbrmstr/metis-tidy")
|
|
|
|
```
|
|
|
|
|
|
|
|
## Usage
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
library(metis.tidy)
|
|
|
|
|
|
|
|
# current verison
|
|
|
|
packageVersion("metis.tidy")
|
|
|
|
```
|
|
|
|
|
|
|
|
### Basic Setup (using an alternate provider)
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
library(metis.tidy)
|
|
|
|
library(tidyverse)
|
|
|
|
|
|
|
|
metis::dbConnect(
|
|
|
|
metis::Athena(),
|
|
|
|
Schema = "sampledb",
|
|
|
|
AwsCredentialsProviderClass = "com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider",
|
|
|
|
AwsCredentialsProviderArguments = path.expand("~/.aws/athenaCredentials.props")
|
|
|
|
) -> con
|
|
|
|
|
|
|
|
elb_logs <- tbl(con, "elb_logs")
|
|
|
|
|
|
|
|
glimpse(elb_logs)
|
|
|
|
```
|
|
|
|
|
|
|
|
#### Using custom Athena functions
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
filter(elb_logs, elbresponsecode == "200") %>%
|
|
|
|
mutate(
|
|
|
|
tsday = as.Date(substring(timestamp, 1L, 10L)),
|
|
|
|
host = url_extract_host(url),
|
|
|
|
proto_version = regexp_extract(protocol, "([[:digit:]\\.]+)"),
|
|
|
|
) %>%
|
|
|
|
select(tsday, host, receivedbytes, requestprocessingtime, proto_version) %>%
|
|
|
|
glimpse()
|
|
|
|
```
|
|
|
|
|
|
|
|
#### All the types work. Some are useful.
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
tbl(con, sql("
|
|
|
|
SELECT
|
|
|
|
CAST('chr' AS CHAR(4)) achar,
|
|
|
|
CAST('varchr' AS VARCHAR) avarchr,
|
|
|
|
CAST(SUBSTR(timestamp, 1, 10) AS DATE) AS tsday,
|
|
|
|
CAST(100.1 AS DOUBLE) AS justadbl,
|
|
|
|
CAST(127 AS TINYINT) AS asmallint,
|
|
|
|
CAST(100 AS INTEGER) AS justanint,
|
|
|
|
CAST(100000000000000000 AS BIGINT) AS abigint,
|
|
|
|
CAST(('GET' = 'GET') AS BOOLEAN) AS is_get,
|
|
|
|
ARRAY[1, 2, 3] AS arr,
|
|
|
|
ARRAY['1', '2, 3', '4'] AS arr,
|
|
|
|
MAP(ARRAY['foo', 'bar'], ARRAY[1, 2]) AS mp,
|
|
|
|
CAST(ROW(1, 2.0) AS ROW(x BIGINT, y DOUBLE)) AS rw,
|
|
|
|
CAST('{\"a\":1}' AS JSON) js
|
|
|
|
FROM elb_logs
|
|
|
|
LIMIT 1
|
|
|
|
")) %>%
|
|
|
|
glimpse()
|
|
|
|
```
|
|
|
|
|
|
|
|
```{r cloc}
|
|
|
|
cloc::cloc_pkg_md()
|
|
|
|
```
|
|
|
|
|
|
|
|
## Code of Conduct
|
|
|
|
|
|
|
|
Please note that this project is released with a
|
|
|
|
[Contributor Code of Conduct](CONDUCT.md). By participating in this project you
|
|
|
|
agree to abide by its terms.
|