Browse Source

pre-CRAN flight check

tags/v0.6.0
boB Rudis 7 years ago
parent
commit
7a588904c1
No known key found for this signature in database GPG Key ID: 2A514A4997464560
  1. 1
      DESCRIPTION
  2. 1
      NAMESPACE
  3. 1
      NEWS.md
  4. 1
      R/ndjson-package.R
  5. 24
      R/ndjson.R
  6. 3
      cran-comments.md
  7. 8
      man/flatten.Rd
  8. 8
      man/stream_in.Rd
  9. 4
      tests/testthat/test-ndjson.R

1
DESCRIPTION

@ -25,6 +25,7 @@ Depends:
Imports:
Rcpp,
data.table,
dplyr,
dtplyr
LinkingTo: Rcpp
RoxygenNote: 6.0.1

1
NAMESPACE

@ -5,5 +5,6 @@ export(stream_in)
export(validate)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,rbindlist)
importFrom(dplyr,tbl_df)
importFrom(dtplyr,tbl_dt)
useDynLib(ndjson, .registration=TRUE)

1
NEWS.md

@ -1,6 +1,7 @@
0.5.0
=====================
* Updated core ndjson file to take care of some buffer overflow vulns
* Optionally returns a `tbl_df` vs a `tbl_dt` (data.table is used for speed on list rbind)
* Fixed CRAN check errors
0.4.0

1
R/ndjson-package.R

@ -7,4 +7,5 @@
#' @importFrom Rcpp sourceCpp
#' @importFrom data.table rbindlist
#' @importFrom dtplyr tbl_dt
#' @importFrom dplyr tbl_df
NULL

24
R/ndjson.R

@ -3,8 +3,12 @@
#' Given a file of streaming JSON (ndjson) this function reads in the records
#' and creates a flat \code{data.table} / \code{tbl_dt} from it.
#'
#' @md
#' @param path path to file (supports "\code{gz}" files)
#' @return \code{tbl_dt}
#' @param cls the package uses \code{data.table::rbindlist} for speed but
#' that's not always the best return type for everyone, so you have
#' option of keeping it a `tbl_dt` via "`dt`" or converting it to a `tbl`
#' @return \code{tbl_dt} or \code{tbl} or \{data.frame}
#' @export
#' @references \url{http://ndjson.org/}
#' @examples
@ -13,9 +17,11 @@
#'
#' gzf <- system.file("extdata", "testgz.json.gz", package="ndjson")
#' nrow(stream_in(gzf))
stream_in <- function(path) {
stream_in <- function(path, cls = c("dt", "tbl")) {
cls <- match.arg(cls, c("dt", "tbl"))
tmp <- .Call('ndjson_internal_stream_in', path.expand(path), PACKAGE='ndjson')
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
tmp <- dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
if (cls == "tbl") dplyr::tbl_df(tmp) else tmp
}
#' Validate ndjson file
@ -42,12 +48,18 @@ validate <- function(path, verbose=FALSE) {
#' Flatten a character vector of individual JSON lines into a \code{tbl_dt}
#'
#' @md
#' @param x character vector of individual JSON lines to flatten
#' @return \code{tbl_dt}
#' @param cls the package uses \code{data.table::rbindlist} for speed but
#' that's not always the best return type for everyone, so you have
#' option of keeping it a `tbl_dt` via "`dt`" or converting it to a `tbl`
#' @return \code{tbl_dt} or \code{tbl} or \{data.frame}
#' @export
#' @examples
#' flatten('{"top":{"next":{"final":1,"end":true},"another":"yes"},"more":"no"}')
flatten <- function(x) {
flatten <- function(x, cls = c("dt", "tbl")) {
cls <- match.arg(cls, c("dt", "tbl"))
tmp <- .Call('ndjson_internal_flatten', x, PACKAGE='ndjson')
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
tmp <- dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE))
if (cls == "tbl") dplyr::tbl_df(tmp) else tmp
}

3
cran-comments.md

@ -1,7 +1,7 @@
## Test environments
* local OS X install, R 3.4.0 (clang)
* local ubuntu 16.04, R 3.4.0 (g++-7)
* win-builder (devel and release)
* win-builder (devel and release) https://win-builder.r-project.org/P5u60xh8LPrA/
## R CMD check results
@ -16,3 +16,4 @@ None
## General notes
- Fixed CRAN gcc 7 toolchain issues (linux)
- Update core C++ library the pkg uses

8
man/flatten.Rd

@ -4,13 +4,17 @@
\alias{flatten}
\title{Flatten a character vector of individual JSON lines into a \code{tbl_dt}}
\usage{
flatten(x)
flatten(x, cls = c("dt", "tbl"))
}
\arguments{
\item{x}{character vector of individual JSON lines to flatten}
\item{cls}{the package uses \code{data.table::rbindlist} for speed but
that's not always the best return type for everyone, so you have
option of keeping it a \code{tbl_dt} via "\code{dt}" or converting it to a \code{tbl}}
}
\value{
\code{tbl_dt}
\code{tbl_dt} or \code{tbl} or {data.frame}
}
\description{
Flatten a character vector of individual JSON lines into a \code{tbl_dt}

8
man/stream_in.Rd

@ -4,13 +4,17 @@
\alias{stream_in}
\title{Stream in & flatten an ndjson file into a \code{tbl_dt}}
\usage{
stream_in(path)
stream_in(path, cls = c("dt", "tbl"))
}
\arguments{
\item{path}{path to file (supports "\code{gz}" files)}
\item{cls}{the package uses \code{data.table::rbindlist} for speed but
that's not always the best return type for everyone, so you have
option of keeping it a \code{tbl_dt} via "\code{dt}" or converting it to a \code{tbl}}
}
\value{
\code{tbl_dt}
\code{tbl_dt} or \code{tbl} or {data.frame}
}
\description{
Given a file of streaming JSON (ndjson) this function reads in the records

4
tests/testthat/test-ndjson.R

@ -4,9 +4,13 @@ test_that("we can do something", {
f <- system.file("extdata", "test.json", package="ndjson")
expect_that(validate(f), equals(TRUE))
expect_that(nrow(stream_in(f)), equals(100))
expect_that(stream_in(f), is_a("tbl_dt"))
expect_that(stream_in(f, "tbl"), is_a("tbl_df"))
gzf <- system.file("extdata", "testgz.json.gz", package="ndjson")
expect_that(validate(gzf), equals(TRUE))
expect_that(nrow(stream_in(gzf)), equals(100))
expect_that(stream_in(gzf), is_a("tbl_dt"))
expect_that(stream_in(gzf, "tbl"), is_a("tbl_df"))
})

Loading…
Cancel
Save