From f822907ff0cd79c9b87651bcf5e8eff73899f01b Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Thu, 25 Aug 2016 17:50:46 -0400 Subject: [PATCH] README --- README.Rmd | 20 +++++++++++-- README.md | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 README.md diff --git a/README.Rmd b/README.Rmd index 1b32170..60b7f8e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -6,6 +6,9 @@ output: rmarkdown::github_document Rcpp/C++11 wrapper for +The goal is to create a completely "flat" `data.frame`-like structure from ndjson +records. + The following functions are implemented: - `stream_in`: Stream in JSON from a character vector @@ -24,17 +27,30 @@ options(width=120) ### Usage -```{r} +```{r message=FALSE} library(ndjson) +library(microbenchmark) # current verison packageVersion("ndjson") +sample_data <- readr::read_lines("http://httpbin.org/stream/100") +length(sample_data) + +dplyr::glimpse(ndjson::stream_in(sample_data)) + +dplyr::glimpse(jsonlite::stream_in(textConnection(sample_data), flatten=TRUE, verbose=FALSE)) + +microbenchmark( + ndjson={ ndjson::stream_in(sample_data) }, + jsonlite={ jsonlite::stream_in(textConnection(sample_data), flatten=TRUE, verbose=FALSE) } +) + ``` ### Test Results -```{r} +```{r message=FALSE} library(ndjson) library(testthat) diff --git a/README.md b/README.md new file mode 100644 index 0000000..6712b3e --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ + +`ndjson` : Wicked-fast Streaming JSON ('ndjson') Reader + +Rcpp/C++11 wrapper for + +The goal is to create a completely "flat" `data.frame`-like structure from ndjson records. + +The following functions are implemented: + +- `stream_in`: Stream in JSON from a character vector +- `stream_in_file`: Stream in JSON from a file + +### Installation + +``` r +devtools::install_github("hrbrmstr/ndjson") +``` + +### Usage + +``` r +library(ndjson) +library(microbenchmark) + +# current verison +packageVersion("ndjson") +``` + + ## [1] '0.1.0' + +``` r +sample_data <- readr::read_lines("http://httpbin.org/stream/100") +length(sample_data) +``` + + ## [1] 100 + +``` r +dplyr::glimpse(ndjson::stream_in(sample_data)) +``` + + ## Observations: 100 + ## Variables: 8 + ## $ args NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,... + ## $ headers.Accept "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*", "*/*",... + ## $ headers.Accept-Encoding "gzip, deflate", "gzip, deflate", "gzip, deflate", "gzip, deflate", "gzip, deflate"... + ## $ headers.Host "httpbin.org", "httpbin.org", "httpbin.org", "httpbin.org", "httpbin.org", "httpbin... + ## $ headers.User-Agent "r/curl/jeroen", "r/curl/jeroen", "r/curl/jeroen", "r/curl/jeroen", "r/curl/jeroen"... + ## $ id 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2... + ## $ origin "50.252.233.22", "50.252.233.22", "50.252.233.22", "50.252.233.22", "50.252.233.22"... + ## $ url "http://httpbin.org/stream/100", "http://httpbin.org/stream/100", "http://httpbin.o... + +``` r +dplyr::glimpse(jsonlite::stream_in(textConnection(sample_data), flatten=TRUE, verbose=FALSE)) +``` + + ## Observations: 100 + ## Variables: 5 + ## $ url "http://httpbin.org/stream/100", "http://httpbin.org/stream/100", "http://httpbin.org/stream/100", ... + ## $ headers c("httpbin.org", "httpbin.org", "httpbin.org", "httpbin.org", "httpbin.org", "httpbin.org", ... + ## $ args + ## $ id 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2... + ## $ origin "50.252.233.22", "50.252.233.22", "50.252.233.22", "50.252.233.22", "50.252.233.22", "50.252.233.22... + +``` r +microbenchmark( + ndjson={ ndjson::stream_in(sample_data) }, + jsonlite={ jsonlite::stream_in(textConnection(sample_data), flatten=TRUE, verbose=FALSE) } +) +``` + + ## Unit: milliseconds + ## expr min lq mean median uq max neval cld + ## ndjson 2.164029 2.260217 2.353497 2.309950 2.361279 3.637044 100 a + ## jsonlite 6.690733 6.859965 7.200450 6.977782 7.117655 9.895136 100 b + +### Test Results + +``` r +library(ndjson) +library(testthat) + +date() +``` + + ## [1] "Thu Aug 25 17:49:56 2016" + +``` r +test_dir("tests/") +``` + + ## testthat results ======================================================================================================== + ## OK: 0 SKIPPED: 0 FAILED: 0 + ## + ## DONE ===================================================================================================================