|
|
@ -3,8 +3,12 @@ |
|
|
|
#' Given a file of streaming JSON (ndjson) this function reads in the records |
|
|
|
#' and creates a flat \code{data.table} / \code{tbl_dt} from it. |
|
|
|
#' |
|
|
|
#' @md |
|
|
|
#' @param path path to file (supports "\code{gz}" files) |
|
|
|
#' @return \code{tbl_dt} |
|
|
|
#' @param cls the package uses \code{data.table::rbindlist} for speed but |
|
|
|
#' that's not always the best return type for everyone, so you have |
|
|
|
#' option of keeping it a `tbl_dt` via "`dt`" or converting it to a `tbl` |
|
|
|
#' @return \code{tbl_dt} or \code{tbl} or \{data.frame} |
|
|
|
#' @export |
|
|
|
#' @references \url{http://ndjson.org/} |
|
|
|
#' @examples |
|
|
@ -13,9 +17,11 @@ |
|
|
|
#' |
|
|
|
#' gzf <- system.file("extdata", "testgz.json.gz", package="ndjson") |
|
|
|
#' nrow(stream_in(gzf)) |
|
|
|
stream_in <- function(path) { |
|
|
|
stream_in <- function(path, cls = c("dt", "tbl")) { |
|
|
|
cls <- match.arg(cls, c("dt", "tbl")) |
|
|
|
tmp <- .Call('ndjson_internal_stream_in', path.expand(path), PACKAGE='ndjson') |
|
|
|
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|
|
|
tmp <- dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|
|
|
if (cls == "tbl") dplyr::tbl_df(tmp) else tmp |
|
|
|
} |
|
|
|
|
|
|
|
#' Validate ndjson file |
|
|
@ -42,12 +48,18 @@ validate <- function(path, verbose=FALSE) { |
|
|
|
|
|
|
|
#' Flatten a character vector of individual JSON lines into a \code{tbl_dt} |
|
|
|
#' |
|
|
|
#' @md |
|
|
|
#' @param x character vector of individual JSON lines to flatten |
|
|
|
#' @return \code{tbl_dt} |
|
|
|
#' @param cls the package uses \code{data.table::rbindlist} for speed but |
|
|
|
#' that's not always the best return type for everyone, so you have |
|
|
|
#' option of keeping it a `tbl_dt` via "`dt`" or converting it to a `tbl` |
|
|
|
#' @return \code{tbl_dt} or \code{tbl} or \{data.frame} |
|
|
|
#' @export |
|
|
|
#' @examples |
|
|
|
#' flatten('{"top":{"next":{"final":1,"end":true},"another":"yes"},"more":"no"}') |
|
|
|
flatten <- function(x) { |
|
|
|
flatten <- function(x, cls = c("dt", "tbl")) { |
|
|
|
cls <- match.arg(cls, c("dt", "tbl")) |
|
|
|
tmp <- .Call('ndjson_internal_flatten', x, PACKAGE='ndjson') |
|
|
|
dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|
|
|
tmp <- dtplyr::tbl_dt(data.table::rbindlist(tmp, fill=TRUE)) |
|
|
|
if (cls == "tbl") dplyr::tbl_df(tmp) else tmp |
|
|
|
} |
|
|
|