boB Rudis
8 years ago
commit
3ecd71fb45
21 changed files with 10767 additions and 0 deletions
@ -0,0 +1,7 @@ |
|||
^.*\.Rproj$ |
|||
^\.Rproj\.user$ |
|||
^\.travis\.yml$ |
|||
^README\.*md$ |
|||
^README\.*html$ |
|||
^NOTES\.*Rmd$ |
|||
^NOTES\.*html$ |
@ -0,0 +1,7 @@ |
|||
.Rproj.user |
|||
.Rhistory |
|||
.RData |
|||
.Rproj |
|||
src/*.o |
|||
src/*.so |
|||
src/*.dll |
@ -0,0 +1,17 @@ |
|||
language: r |
|||
warnings_are_errors: true |
|||
sudo: required |
|||
|
|||
apt_packages: |
|||
- binutils |
|||
- libproj-dev |
|||
- gdal-bin |
|||
|
|||
env: |
|||
global: |
|||
- CRAN: http://cran.rstudio.com |
|||
|
|||
notifications: |
|||
email: |
|||
on_success: change |
|||
on_failure: change |
@ -0,0 +1,29 @@ |
|||
Package: ndjson |
|||
Type: Package |
|||
Title: Wicked-fast Streaming JSON ('ndjson') Reader |
|||
Version: 0.1.0 |
|||
Date: 2016-08-25 |
|||
Author: Bob Rudis (@hrbrmstr) |
|||
Maintainer: Bob Rudis <bob@rudis.net> |
|||
Description: Streaming JSON ('ndjson') has one JSON record per-line and many modern |
|||
'ndjson' files contain large numbers of records. These constructs may not be |
|||
columnar in nature, but it's often useful to read in these files and "flatten" |
|||
the structure out to work in an R data.frame context. Functions are provided that |
|||
make it possible to read in 'ndjson' from character vectors or files and create |
|||
"flat" data.table structures from them. |
|||
URL: http://github.com/hrbrmstr/ndjson |
|||
BugReports: https://github.com/hrbrmstr/ndjson/issues |
|||
License: AGPL |
|||
Suggests: |
|||
testthat |
|||
Depends: |
|||
R (>= 3.0.0) |
|||
Imports: |
|||
purrr, |
|||
Rcpp, |
|||
Rcpp11, |
|||
stringi, |
|||
data.table, |
|||
dtplyr |
|||
LinkingTo: Rcpp, Rcpp11 |
|||
RoxygenNote: 5.0.1 |
@ -0,0 +1,11 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
export(stream_in) |
|||
export(stream_in_file) |
|||
import(Rcpp11) |
|||
import(purrr) |
|||
importFrom(Rcpp,sourceCpp) |
|||
importFrom(data.table,rbindlist) |
|||
importFrom(dtplyr,tbl_dt) |
|||
importFrom(stringi,stri_read_lines) |
|||
useDynLib(ndjson) |
@ -0,0 +1,2 @@ |
|||
0.1.0 |
|||
* Initial release |
@ -0,0 +1,7 @@ |
|||
# This file was generated by Rcpp::compileAttributes |
|||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
|||
|
|||
internal_stream_in <- function(lines) { |
|||
.Call('ndjson_internal_stream_in', PACKAGE = 'ndjson', lines) |
|||
} |
|||
|
@ -0,0 +1,13 @@ |
|||
#' Wicked-fast Streaming JSON ('ndjson) Reader |
|||
#' |
|||
#' @name ndjson |
|||
#' @docType package |
|||
#' @author Bob Rudis (@@hrbrmstr) |
|||
#' @import purrr |
|||
#' @useDynLib ndjson |
|||
#' @import Rcpp11 |
|||
#' @importFrom Rcpp sourceCpp |
|||
#' @importFrom stringi stri_read_lines |
|||
#' @importFrom data.table rbindlist |
|||
#' @importFrom dtplyr tbl_dt |
|||
NULL |
@ -0,0 +1,24 @@ |
|||
#' Stream in JSON from a file |
|||
#' |
|||
#' Given a file of streaming JSON (ndjson) this function uses \code{stringi::stri_read_lines()} |
|||
#' to read the data in quickly and create a flat \code{data.table} / \code{tbl_dt} |
|||
#' from it. |
|||
#' |
|||
#' @param x path |
|||
#' @export |
|||
stream_in_file <- function(x) { |
|||
tmp <- .Call('ndjson_internal_stream_in', stringi::stri_read_lines(x), PACKAGE = 'ndjson' ) |
|||
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|||
} |
|||
|
|||
#' Stream in JSON from a character vector |
|||
#' |
|||
#' Given a character vector of streaming JSON (ndjson) this function will |
|||
#' create a flat \code{data.table} / \code{tbl_dt} from it. |
|||
#' |
|||
#' @param x character vector |
|||
#' @export |
|||
stream_in <- function(x) { |
|||
tmp <- .Call('ndjson_internal_stream_in', x, PACKAGE = 'ndjson') |
|||
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|||
} |
@ -0,0 +1,45 @@ |
|||
--- |
|||
output: rmarkdown::github_document |
|||
--- |
|||
|
|||
`ndjson` : Wicked-fast Streaming JSON ('ndjson') Reader |
|||
|
|||
Rcpp/C++11 wrapper for <https://github.com/nlohmann/json> |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `stream_in`: Stream in JSON from a character vector |
|||
- `stream_in_file`: Stream in JSON from a file |
|||
|
|||
|
|||
### Installation |
|||
|
|||
```{r eval=FALSE} |
|||
devtools::install_github("hrbrmstr/ndjson") |
|||
``` |
|||
|
|||
```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE} |
|||
options(width=120) |
|||
``` |
|||
|
|||
### Usage |
|||
|
|||
```{r} |
|||
library(ndjson) |
|||
|
|||
# current verison |
|||
packageVersion("ndjson") |
|||
|
|||
``` |
|||
|
|||
### Test Results |
|||
|
|||
```{r} |
|||
library(ndjson) |
|||
library(testthat) |
|||
|
|||
date() |
|||
|
|||
test_dir("tests/") |
|||
``` |
|||
|
@ -0,0 +1,14 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/ndjson-package.R |
|||
\docType{package} |
|||
\name{ndjson} |
|||
\alias{ndjson} |
|||
\alias{ndjson-package} |
|||
\title{Wicked-fast Streaming JSON ('ndjson) Reader} |
|||
\description{ |
|||
Wicked-fast Streaming JSON ('ndjson) Reader |
|||
} |
|||
\author{ |
|||
Bob Rudis (@hrbrmstr) |
|||
} |
|||
|
@ -0,0 +1,16 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/ndjson.R |
|||
\name{stream_in} |
|||
\alias{stream_in} |
|||
\title{Stream in JSON from a character vector} |
|||
\usage{ |
|||
stream_in(x) |
|||
} |
|||
\arguments{ |
|||
\item{x}{character vector} |
|||
} |
|||
\description{ |
|||
Given a character vector of streaming JSON (ndjson) this function will |
|||
create a flat \code{data.table} / \code{tbl_dt} from it. |
|||
} |
|||
|
@ -0,0 +1,17 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/ndjson.R |
|||
\name{stream_in_file} |
|||
\alias{stream_in_file} |
|||
\title{Stream in JSON from a file} |
|||
\usage{ |
|||
stream_in_file(x) |
|||
} |
|||
\arguments{ |
|||
\item{x}{path} |
|||
} |
|||
\description{ |
|||
Given a file of streaming JSON (ndjson) this function uses \code{stringi::stri_read_lines()} |
|||
to read the data in quickly and create a flat \code{data.table} / \code{tbl_dt} |
|||
from it. |
|||
} |
|||
|
@ -0,0 +1,21 @@ |
|||
Version: 1.0 |
|||
|
|||
RestoreWorkspace: Default |
|||
SaveWorkspace: Default |
|||
AlwaysSaveHistory: Default |
|||
|
|||
EnableCodeIndexing: Yes |
|||
UseSpacesForTab: Yes |
|||
NumSpacesForTab: 2 |
|||
Encoding: UTF-8 |
|||
|
|||
RnwWeave: Sweave |
|||
LaTeX: pdfLaTeX |
|||
|
|||
StripTrailingWhitespace: Yes |
|||
|
|||
BuildType: Package |
|||
PackageUseDevtools: Yes |
|||
PackageInstallArgs: --no-multiarch --with-keep.source |
|||
PackageBuildArgs: --resave-data |
|||
PackageRoxygenize: rd,collate,namespace |
@ -0,0 +1,3 @@ |
|||
*.o |
|||
*.so |
|||
*.dll |
@ -0,0 +1 @@ |
|||
PKG_CXXFLAGS = -std=c++11 |
@ -0,0 +1,18 @@ |
|||
// This file was generated by Rcpp::compileAttributes
|
|||
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
|
|||
|
|||
#include <Rcpp.h> |
|||
|
|||
using namespace Rcpp; |
|||
|
|||
// internal_stream_in
|
|||
List internal_stream_in(std::vector < std::string > lines); |
|||
RcppExport SEXP ndjson_internal_stream_in(SEXP linesSEXP) { |
|||
BEGIN_RCPP |
|||
Rcpp::RObject __result; |
|||
Rcpp::RNGScope __rngScope; |
|||
Rcpp::traits::input_parameter< std::vector < std::string > >::type lines(linesSEXP); |
|||
__result = Rcpp::wrap(internal_stream_in(lines)); |
|||
return __result; |
|||
END_RCPP |
|||
} |
File diff suppressed because it is too large
@ -0,0 +1,61 @@ |
|||
#include <Rcpp.h> |
|||
using namespace Rcpp; |
|||
|
|||
#include "json.hpp" |
|||
using json = nlohmann::json; |
|||
|
|||
// [[Rcpp::plugins(cpp11)]]
|
|||
|
|||
// [[Rcpp::export]]
|
|||
List internal_stream_in(std::vector < std::string > lines) { |
|||
|
|||
List container(lines.size()); |
|||
|
|||
for (R_xlen_t j=0; j<lines.size(); j++) { |
|||
|
|||
json o = json::parse(lines[j]).flatten(); |
|||
|
|||
List lst(o.size()); |
|||
CharacterVector lst_nms(o.size()); |
|||
|
|||
double d_val; |
|||
std::string s_val; |
|||
bool b_val; |
|||
|
|||
R_xlen_t i=0; |
|||
for (json::iterator it = o.begin(); it != o.end(); ++it) { |
|||
|
|||
std::string key = it.key(); |
|||
std::replace(key.begin(), key.end(), '/', '.'); |
|||
key.erase(0, 1); |
|||
|
|||
lst_nms[i] = key; |
|||
|
|||
if (it.value().is_number()) { |
|||
d_val = it.value(); |
|||
lst[i] = d_val; |
|||
} else if (it.value().is_boolean()) { |
|||
b_val = it.value(); |
|||
lst[i] = b_val; |
|||
} else if (it.value().is_string()) { |
|||
s_val = it.value(); |
|||
lst[i] = s_val; |
|||
} else if (it.value().is_null()) { |
|||
lst[i] = NA_LOGICAL; |
|||
} |
|||
|
|||
i += 1; |
|||
|
|||
} |
|||
|
|||
lst.attr("names") = lst_nms; |
|||
lst.attr("class") = "data.frame"; |
|||
lst.attr("row.names") = 1; |
|||
|
|||
container[j] = lst; |
|||
|
|||
} |
|||
|
|||
return(container); |
|||
|
|||
} |
@ -0,0 +1,2 @@ |
|||
library(testthat) |
|||
test_check("ndjson") |
@ -0,0 +1,6 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
|
|||
#expect_that(some_function(), is_a("data.frame")) |
|||
|
|||
}) |
Loading…
Reference in new issue