Browse Source

initial commit

master
boB Rudis 8 years ago
commit
3ecd71fb45
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 7
      .Rbuildignore
  2. 7
      .gitignore
  3. 17
      .travis.yml
  4. 29
      DESCRIPTION
  5. 11
      NAMESPACE
  6. 2
      NEWS.md
  7. 7
      R/RcppExports.R
  8. 13
      R/ndjson-package.R
  9. 24
      R/ndjson.R
  10. 45
      README.Rmd
  11. 14
      man/ndjson.Rd
  12. 16
      man/stream_in.Rd
  13. 17
      man/stream_in_file.Rd
  14. 21
      ndjson.Rproj
  15. 3
      src/.gitignore
  16. 1
      src/Makevars
  17. 18
      src/RcppExports.cpp
  18. 10446
      src/json.hpp
  19. 61
      src/ndjson.cpp
  20. 2
      tests/test-all.R
  21. 6
      tests/testthat/test-ndjson.R

7
.Rbuildignore

@ -0,0 +1,7 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*md$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$

7
.gitignore

@ -0,0 +1,7 @@
.Rproj.user
.Rhistory
.RData
.Rproj
src/*.o
src/*.so
src/*.dll

17
.travis.yml

@ -0,0 +1,17 @@
language: r
warnings_are_errors: true
sudo: required
apt_packages:
- binutils
- libproj-dev
- gdal-bin
env:
global:
- CRAN: http://cran.rstudio.com
notifications:
email:
on_success: change
on_failure: change

29
DESCRIPTION

@ -0,0 +1,29 @@
Package: ndjson
Type: Package
Title: Wicked-fast Streaming JSON ('ndjson') Reader
Version: 0.1.0
Date: 2016-08-25
Author: Bob Rudis (@hrbrmstr)
Maintainer: Bob Rudis <bob@rudis.net>
Description: Streaming JSON ('ndjson') has one JSON record per-line and many modern
'ndjson' files contain large numbers of records. These constructs may not be
columnar in nature, but it's often useful to read in these files and "flatten"
the structure out to work in an R data.frame context. Functions are provided that
make it possible to read in 'ndjson' from character vectors or files and create
"flat" data.table structures from them.
URL: http://github.com/hrbrmstr/ndjson
BugReports: https://github.com/hrbrmstr/ndjson/issues
License: AGPL
Suggests:
testthat
Depends:
R (>= 3.0.0)
Imports:
purrr,
Rcpp,
Rcpp11,
stringi,
data.table,
dtplyr
LinkingTo: Rcpp, Rcpp11
RoxygenNote: 5.0.1

11
NAMESPACE

@ -0,0 +1,11 @@
# Generated by roxygen2: do not edit by hand
export(stream_in)
export(stream_in_file)
import(Rcpp11)
import(purrr)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,rbindlist)
importFrom(dtplyr,tbl_dt)
importFrom(stringi,stri_read_lines)
useDynLib(ndjson)

2
NEWS.md

@ -0,0 +1,2 @@
0.1.0
* Initial release

7
R/RcppExports.R

@ -0,0 +1,7 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
internal_stream_in <- function(lines) {
.Call('ndjson_internal_stream_in', PACKAGE = 'ndjson', lines)
}

13
R/ndjson-package.R

@ -0,0 +1,13 @@
#' Wicked-fast Streaming JSON ('ndjson) Reader
#'
#' @name ndjson
#' @docType package
#' @author Bob Rudis (@@hrbrmstr)
#' @import purrr
#' @useDynLib ndjson
#' @import Rcpp11
#' @importFrom Rcpp sourceCpp
#' @importFrom stringi stri_read_lines
#' @importFrom data.table rbindlist
#' @importFrom dtplyr tbl_dt
NULL

24
R/ndjson.R

@ -0,0 +1,24 @@
#' Stream in JSON from a file
#'
#' Given a file of streaming JSON (ndjson) this function uses \code{stringi::stri_read_lines()}
#' to read the data in quickly and create a flat \code{data.table} / \code{tbl_dt}
#' from it.
#'
#' @param x path
#' @export
stream_in_file <- function(x) {
tmp <- .Call('ndjson_internal_stream_in', stringi::stri_read_lines(x), PACKAGE = 'ndjson' )
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
}
#' Stream in JSON from a character vector
#'
#' Given a character vector of streaming JSON (ndjson) this function will
#' create a flat \code{data.table} / \code{tbl_dt} from it.
#'
#' @param x character vector
#' @export
stream_in <- function(x) {
tmp <- .Call('ndjson_internal_stream_in', x, PACKAGE = 'ndjson')
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
}

45
README.Rmd

@ -0,0 +1,45 @@
---
output: rmarkdown::github_document
---
`ndjson` : Wicked-fast Streaming JSON ('ndjson') Reader
Rcpp/C++11 wrapper for <https://github.com/nlohmann/json>
The following functions are implemented:
- `stream_in`: Stream in JSON from a character vector
- `stream_in_file`: Stream in JSON from a file
### Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/ndjson")
```
```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE}
options(width=120)
```
### Usage
```{r}
library(ndjson)
# current verison
packageVersion("ndjson")
```
### Test Results
```{r}
library(ndjson)
library(testthat)
date()
test_dir("tests/")
```

14
man/ndjson.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ndjson-package.R
\docType{package}
\name{ndjson}
\alias{ndjson}
\alias{ndjson-package}
\title{Wicked-fast Streaming JSON ('ndjson) Reader}
\description{
Wicked-fast Streaming JSON ('ndjson) Reader
}
\author{
Bob Rudis (@hrbrmstr)
}

16
man/stream_in.Rd

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ndjson.R
\name{stream_in}
\alias{stream_in}
\title{Stream in JSON from a character vector}
\usage{
stream_in(x)
}
\arguments{
\item{x}{character vector}
}
\description{
Given a character vector of streaming JSON (ndjson) this function will
create a flat \code{data.table} / \code{tbl_dt} from it.
}

17
man/stream_in_file.Rd

@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ndjson.R
\name{stream_in_file}
\alias{stream_in_file}
\title{Stream in JSON from a file}
\usage{
stream_in_file(x)
}
\arguments{
\item{x}{path}
}
\description{
Given a file of streaming JSON (ndjson) this function uses \code{stringi::stri_read_lines()}
to read the data in quickly and create a flat \code{data.table} / \code{tbl_dt}
from it.
}

21
ndjson.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

3
src/.gitignore

@ -0,0 +1,3 @@
*.o
*.so
*.dll

1
src/Makevars

@ -0,0 +1 @@
PKG_CXXFLAGS = -std=c++11

18
src/RcppExports.cpp

@ -0,0 +1,18 @@
// This file was generated by Rcpp::compileAttributes
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include <Rcpp.h>
using namespace Rcpp;
// internal_stream_in
List internal_stream_in(std::vector < std::string > lines);
RcppExport SEXP ndjson_internal_stream_in(SEXP linesSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector < std::string > >::type lines(linesSEXP);
__result = Rcpp::wrap(internal_stream_in(lines));
return __result;
END_RCPP
}

10446
src/json.hpp

File diff suppressed because it is too large

61
src/ndjson.cpp

@ -0,0 +1,61 @@
#include <Rcpp.h>
using namespace Rcpp;
#include "json.hpp"
using json = nlohmann::json;
// [[Rcpp::plugins(cpp11)]]
// [[Rcpp::export]]
List internal_stream_in(std::vector < std::string > lines) {
List container(lines.size());
for (R_xlen_t j=0; j<lines.size(); j++) {
json o = json::parse(lines[j]).flatten();
List lst(o.size());
CharacterVector lst_nms(o.size());
double d_val;
std::string s_val;
bool b_val;
R_xlen_t i=0;
for (json::iterator it = o.begin(); it != o.end(); ++it) {
std::string key = it.key();
std::replace(key.begin(), key.end(), '/', '.');
key.erase(0, 1);
lst_nms[i] = key;
if (it.value().is_number()) {
d_val = it.value();
lst[i] = d_val;
} else if (it.value().is_boolean()) {
b_val = it.value();
lst[i] = b_val;
} else if (it.value().is_string()) {
s_val = it.value();
lst[i] = s_val;
} else if (it.value().is_null()) {
lst[i] = NA_LOGICAL;
}
i += 1;
}
lst.attr("names") = lst_nms;
lst.attr("class") = "data.frame";
lst.attr("row.names") = 1;
container[j] = lst;
}
return(container);
}

2
tests/test-all.R

@ -0,0 +1,2 @@
library(testthat)
test_check("ndjson")

6
tests/testthat/test-ndjson.R

@ -0,0 +1,6 @@
context("basic functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})
Loading…
Cancel
Save