diff --git a/DESCRIPTION b/DESCRIPTION index 137b1ee..23169a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,8 +5,8 @@ Version: 0.1.0 Date: 2016-08-12 Author: Bob Rudis (@hrbrmstr), Christos Zoulas [libmagic] Maintainer: Bob Rudis -Description: The 'libmagic' library provides functions to determine - mime type and other metadata from files through their "magic" +Description: The 'libmagic' library provides functions to determine + mime type and other metadata from files through their "magic" attributes. URL: http://github.com/hrbrmstr/wand BugReports: https://github.com/hrbrmstr/wand/issues @@ -16,7 +16,8 @@ Suggests: testthat, tibble, magrittr, - dplyr + dplyr, + rappdirs Depends: R (>= 3.0.0) Imports: diff --git a/NAMESPACE b/NAMESPACE index 85692cf..37aa86d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand export(incant) +export(magic_wand_file) import(purrr) +import(rappdirs) importFrom(Rcpp,sourceCpp) useDynLib(wand) diff --git a/R/RcppExports.R b/R/RcppExports.R index d7908c1..c07abf3 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -4,10 +4,15 @@ #' Retrieve 'magic' attributes from files and directories #' #' @param path character vector of files to use magic on +#' @param magic_db either "\code{system}" (the default) to use the system +#' \code{magic} database or an atomic character vector with a +#' colon-separated list of full paths to custom \code{magic} database(s). #' @return a \code{tibble} / \code{data.frame} of file magic attributes. #' Specifically, mime type, encoding, possible file extensions and #' type description are returned as colums in the data frame along #' with \code{path}. +#' @references See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} +#' for information on how to create your own \code{magic} database #' @export #' @examples #' library(magrittr) @@ -17,7 +22,7 @@ #' list.files(full.names=TRUE) %>% #' incant() %>% #' glimpse() -incant <- function(path) { - .Call('wand_incant', PACKAGE = 'wand', path) +incant <- function(path, magic_db = "system") { + .Call('wand_incant', PACKAGE = 'wand', path, magic_db) } diff --git a/R/wand-package.R b/R/wand-package.R index ac6ef33..55c463e 100644 --- a/R/wand-package.R +++ b/R/wand-package.R @@ -4,6 +4,7 @@ #' @docType package #' @author Bob Rudis (@@hrbrmstr) #' @import purrr +#' @import rappdirs #' @useDynLib wand #' @importFrom Rcpp sourceCpp NULL diff --git a/R/zzz.r b/R/zzz.r new file mode 100644 index 0000000..92ac626 --- /dev/null +++ b/R/zzz.r @@ -0,0 +1,34 @@ +#' Use the "magic" file that comes with the package +#' +#' The \code{magic_load()} functon from \code{libmagic} can't take ZIP files +#' and the \code{magic.mgc} file that ships with the package is too large to +#' be shipped uncompressed. Using this function as the \code{magic_db} +#' parameter will copy and uncompress the database to a cache directory and +#' return the full path to the magic file. Subsequent calls will not have to +#' perform the decompression unless \code{force} is \code{TRUE} or the +#' cache directory has been cleared. +#' +#' @param force ensure the lastest copy of the pacakge "magic" +#' database is used. +#' @export +#' @examples +# ' library(magrittr) +# ' library(dplyr) +#' +#' system.file("img", package="filemagic") %>% +#' list.files(full.names=TRUE) %>% +#' incant(magic_wand_file()) %>% +#' glimpse() +magic_wand_file <- function(force=FALSE) { + + cache <- rappdirs::user_cache_dir("wandr") + + if (!dir.exists(cache)) dir.create(cache, showWarnings=FALSE) + if (!dir.exists(cache)) return("system") + + suppressWarnings(unzip(system.file("magic.mgc.zip", package="wand"), + exdir=cache, overwrite=force)) + + file.path(rappdirs::user_cache_dir("wandr"), "magic.mgc") + +} \ No newline at end of file diff --git a/README.Rmd b/README.Rmd index a7643cc..cb3e170 100644 --- a/README.Rmd +++ b/README.Rmd @@ -9,6 +9,7 @@ The `libmagic` library must be installed and available to use this. The package The following functions are implemented: - `incant` : returns the "magic" metadata of the files in the input vector (as a data frame) +- `magic_wand_file` : provides a full path to the package-provided `magic` file ### Installation @@ -32,6 +33,12 @@ system.file("img", package="wand") %>% incant() %>% glimpse() +system.file("img", package="wand") %>% + list.files(full.names=TRUE) %>% + incant(magic_wand_file()) %>% + select(description) %>% + unlist(use.names=FALSE) + # current verison packageVersion("wand") diff --git a/README.md b/README.md index b492554..049f92b 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ The `libmagic` library must be installed and available to use this. The package The following functions are implemented: - `incant` : returns the "magic" metadata of the files in the input vector (as a data frame) +- `magic_wand_file` : provides a full path to the package-provided `magic` file ### Installation @@ -35,6 +36,25 @@ system.file("img", package="wand") %>% ## $ description "directory", "C source, ASCII text", "HTML document, ASCII text, with CRLF line terminators", "... ``` r +system.file("img", package="wand") %>% + list.files(full.names=TRUE) %>% + incant(magic_wand_file()) %>% + select(description) %>% + unlist(use.names=FALSE) +``` + + ## [1] "directory" + ## [2] "C source, ASCII text" + ## [3] "HTML document, ASCII text, with CRLF line terminators" + ## [4] "ASCII text, with no line terminators" + ## [5] "Rich Text Format data, version 1, ANSI" + ## [6] "JPEG image data, JFIF standard 1.01, aspect ratio, density 72x72, segment length 16, Exif Standard: [TIFF image data, big-endian, direntries=2, orientation=upper-left], baseline, precision 8, 800x700, frames 3" + ## [7] "PDF document, version 1.3" + ## [8] "PNG image data, 800 x 700, 8-bit/color RGBA, non-interlaced" + ## [9] "ASCII text, with very long lines, with CRLF line terminators" + ## [10] "TIFF image data, big-endian" + +``` r # current verison packageVersion("wand") ``` @@ -50,7 +70,7 @@ library(testthat) date() ``` - ## [1] "Fri Aug 12 22:23:39 2016" + ## [1] "Fri Aug 12 23:38:14 2016" ``` r test_dir("tests/") diff --git a/inst/magic.mgc.zip b/inst/magic.mgc.zip new file mode 100644 index 0000000..968a793 Binary files /dev/null and b/inst/magic.mgc.zip differ diff --git a/man/incant.Rd b/man/incant.Rd index 6ee2386..f7a4a83 100644 --- a/man/incant.Rd +++ b/man/incant.Rd @@ -4,14 +4,14 @@ \alias{incant} \title{Retrieve 'magic' attributes from files and directories} \usage{ -incant(path, magic_db = NA_character_) +incant(path, magic_db = "system") } \arguments{ \item{path}{character vector of files to use magic on} -\item{magic_db}{either \code{NULL} (the default) to use the built-in -\code{magic.mgc} database or an atomic character vector with a -colon-separated list of full paths to custom \code{magic.mgc} database(s).} +\item{magic_db}{either "\code{system}" (the default) to use the system +\code{magic} database or an atomic character vector with a +colon-separated list of full paths to custom \code{magic} database(s).} } \value{ a \code{tibble} / \code{data.frame} of file magic attributes. @@ -33,6 +33,6 @@ system.file("img", package="filemagic") \%>\% } \references{ See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} - for information on how to create your own \code{magic.mgc} databases. + for information on how to create your own \code{magic} database } diff --git a/man/magic_wand_file.Rd b/man/magic_wand_file.Rd new file mode 100644 index 0000000..19798ec --- /dev/null +++ b/man/magic_wand_file.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.r +\name{magic_wand_file} +\alias{magic_wand_file} +\title{Use the "magic" file that comes with the package} +\usage{ +magic_wand_file(force = FALSE) +} +\arguments{ +\item{force}{ensure the lastest copy of the pacakge "magic" +database is used.} +} +\description{ +The \code{magic_load()} functon from \code{libmagic} can't take ZIP files +and the \code{magic.mgc} file that ships with the package is too large to +be shipped uncompressed. Using this function as the \code{magic_db} +parameter will copy and uncompress the database to a cache directory and +return the full path to the magic file. Subsequent calls will not have to +perform the decompression unless \code{force} is \code{TRUE} or the +cache directory has been cleared. +} +\examples{ +library(magrittr) +library(dplyr) + +system.file("img", package="filemagic") \%>\% + list.files(full.names=TRUE) \%>\% + incant(magic_wand_file()) \%>\% + glimpse() +} + diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 3e00e48..3e952c1 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -6,13 +6,14 @@ using namespace Rcpp; // incant -DataFrame incant(CharacterVector path); -RcppExport SEXP wand_incant(SEXP pathSEXP) { +DataFrame incant(CharacterVector path, std::string magic_db); +RcppExport SEXP wand_incant(SEXP pathSEXP, SEXP magic_dbSEXP) { BEGIN_RCPP Rcpp::RObject __result; Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< CharacterVector >::type path(pathSEXP); - __result = Rcpp::wrap(incant(path)); + Rcpp::traits::input_parameter< std::string >::type magic_db(magic_dbSEXP); + __result = Rcpp::wrap(incant(path, magic_db)); return __result; END_RCPP } diff --git a/src/wand.cpp b/src/wand.cpp index 3b5a5c6..f19baef 100644 --- a/src/wand.cpp +++ b/src/wand.cpp @@ -7,10 +7,15 @@ using namespace Rcpp; //' Retrieve 'magic' attributes from files and directories //' //' @param path character vector of files to use magic on +//' @param magic_db either "\code{system}" (the default) to use the system +//' \code{magic} database or an atomic character vector with a +//' colon-separated list of full paths to custom \code{magic} database(s). //' @return a \code{tibble} / \code{data.frame} of file magic attributes. //' Specifically, mime type, encoding, possible file extensions and //' type description are returned as colums in the data frame along //' with \code{path}. +//' @references See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} +//' for information on how to create your own \code{magic} database //' @export //' @examples //' library(magrittr) @@ -21,7 +26,7 @@ using namespace Rcpp; //' incant() %>% //' glimpse() // [[Rcpp::export]] -DataFrame incant(CharacterVector path) { +DataFrame incant(CharacterVector path, std::string magic_db="system") { unsigned int input_size = path.size(); @@ -30,7 +35,15 @@ DataFrame incant(CharacterVector path) { StringVector extensions(input_size); StringVector description(input_size); - const char *mtype = NULL; + const char *mdb; + std::string mdbcpp; + + if (magic_db == "system") { + mdb = NULL; + } else { + mdbcpp = magic_db; + mdb = mdbcpp.c_str(); + } for (unsigned int i=0; i