diff --git a/DESCRIPTION b/DESCRIPTION index fbfec74..3ad694f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,28 +1,29 @@ Package: wand Type: Package Title: Retrieve 'Magic' Attributes from Files and Directories -Version: 0.1.0 -Date: 2016-08-12 +Version: 0.2.0 +Date: 2016-08-13 Author: Bob Rudis (@hrbrmstr), Christos Zoulas [libmagic] -Maintainer: Bob Rudis +Maintainer: Bob Rudis Description: The 'libmagic' library provides functions to determine mime type and other metadata from files through their "magic" attributes. URL: http://github.com/hrbrmstr/wand BugReports: https://github.com/hrbrmstr/wand/issues -SystemRequirements: libmagic +SystemRequirements: libmagic (>= 5.14) License: AGPL Suggests: - testthat, - tibble, - magrittr, - dplyr + testthat Depends: R (>= 3.0.0) Imports: purrr, Rcpp, rappdirs, - utils + tibble, + utils, + tidyr, + stringi, + dplyr LinkingTo: Rcpp RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index c0e7d1e..03c01ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,12 @@ export(incant) export(magic_wand_file) import(purrr) +import(stringi) +import(tibble) +import(tidyr) importFrom(Rcpp,sourceCpp) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) importFrom(rappdirs,user_cache_dir) importFrom(utils,unzip) useDynLib(wand) diff --git a/NEWS.md b/NEWS.md index 9b4679b..c648f22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,2 +1,5 @@ -0.1.0 +# 0.2.0 +* Works on Windows + +# 0.1.0 * Initial release diff --git a/R/RcppExports.R b/R/RcppExports.R index 0f0fa1a..9676da9 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,31 +1,8 @@ # This file was generated by Rcpp::compileAttributes # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#' Retrieve 'magic' attributes from files and directories -#' -#' @param path character vector of files to use magic on -#' @param magic_db either "\code{system}" (the default) to use the system -#' \code{magic} database or an atomic character vector with a -#' colon-separated list of full paths to custom \code{magic} database(s). -#' @return a \code{tibble} / \code{data.frame} of file magic attributes. -#' Specifically, mime type, encoding, possible file extensions and -#' type description are returned as colums in the data frame along -#' with \code{path}. -#' @note Various fields might not be available depending on the version -#' of \code{libmagic} you have installed. -#' @references See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} -#' for information on how to create your own \code{magic} database -#' @export -#' @examples -#' library(magrittr) -#' library(dplyr) -#' -#' system.file("img", package="filemagic") %>% -#' list.files(full.names=TRUE) %>% -#' incant() %>% -#' glimpse() -incant <- function(path, magic_db = "system") { - .Call('wand_incant', PACKAGE = 'wand', path, magic_db) +incant_ <- function(path, magic_db = "system") { + .Call('wand_incant_', PACKAGE = 'wand', path, magic_db) } lib_version <- function() { diff --git a/R/wand-package.R b/R/wand-package.R index d373cf6..99a7322 100644 --- a/R/wand-package.R +++ b/R/wand-package.R @@ -4,8 +4,12 @@ #' @docType package #' @author Bob Rudis (@@hrbrmstr) #' @import purrr +#' @import tibble +#' @import tidyr +#' @import stringi #' @importFrom rappdirs user_cache_dir #' @useDynLib wand #' @importFrom Rcpp sourceCpp #' @importFrom utils unzip +#' @importFrom dplyr mutate left_join NULL diff --git a/R/wand.r b/R/wand.r new file mode 100644 index 0000000..3e53579 --- /dev/null +++ b/R/wand.r @@ -0,0 +1,76 @@ +#' Retrieve 'magic' attributes from files and directories +#' +#' @param path character vector of files to use magic on +#' @param magic_db either "\code{system}" (the default) to use the system +#' \code{magic} database or an atomic character vector with a +#' colon-separated list of full paths to custom \code{magic} database(s). +#' @return a \code{tibble} / \code{data.frame} of file magic attributes. +#' Specifically, mime type, encoding, possible file extensions and +#' type description are returned as colums in the data frame along +#' with \code{path}. +#' @note Various fields might not be available depending on the version +#' of \code{libmagic} you have installed. +#' @references See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} +#' for information on how to create your own \code{magic} database +#' @export +#' @examples +#' library(magrittr) +#' library(dplyr) +#' +#' system.file("img", package="filemagic") %>% +#' list.files(full.names=TRUE) %>% +#' incant() %>% +#' glimpse() +incant <- function(path, magic_db="system") { + + if (get_os() == "win") { + + file_exe <- system.file("exec/file.exe", package="wand") + + magic_db <- normalizePath(magic_wand_file()) + + tf <- tempfile() + writeLines(path, tf) + + system2(file_exe, + c("--mime-type", "--mime-encoding", "--no-buffer", "--preserve-date", + '--separator "||"', sprintf('--magic-file "%s"', magic_db), + sprintf('--files-from "%s"', tf)), + stdout=TRUE) -> output_1 + + system2(file_exe, + c("--no-buffer", "--preserve-date", '--separator "||"', + sprintf('--magic-file "%s"', magic_db), + sprintf('--files-from "%s"', tf)), + stdout=TRUE) -> output_2 + + unlink(tf) + + stri_split_fixed(output_1, "||", n=2, simplify=TRUE) %>% + as_data_frame() %>% + setNames(c("file", "response")) %>% + separate(response, c("mime_type", "encoding"), sep=";", extra="drop", fill="right") %>% + mutate(encoding=stri_replace_first_regex(encoding, "charset=", "")) -> df1 + + stri_split_fixed(output_2, "||", n=2, simplify=TRUE) %>% + as_data_frame() %>% + setNames(c("file", "description")) -> df2 + + left_join(df1, df2, by="file") + + } else { + incant_(path, magic_db) + } +} + +get_os <- function () { + if (.Platform$OS.type == "windows") { + "win" + } else if (Sys.info()["sysname"] == "Darwin") { + "mac" + } else if (.Platform$OS.type == "unix") { + "unix" + } else { + stop("Unknown OS") + } +} \ No newline at end of file diff --git a/R/zzz.r b/R/zzz.r index b3583ef..7fbbb2b 100644 --- a/R/zzz.r +++ b/R/zzz.r @@ -8,7 +8,7 @@ #' perform the decompression unless \code{force} is \code{TRUE} or the #' cache directory has been cleared. #' -#' @param force ensure the lastest copy of the pacakge "magic" +#' @param refresh ensure the lastest copy of the pacakge "magic" #' database is used. #' @export #' @examples @@ -19,18 +19,19 @@ #' list.files(full.names=TRUE) %>% #' incant(magic_wand_file()) %>% #' glimpse() -magic_wand_file <- function(force=FALSE) { +magic_wand_file <- function(refresh=FALSE) { cache <- rappdirs::user_cache_dir("wandr") - if (!dir.exists(cache)) dir.create(cache, showWarnings=FALSE) - if (!dir.exists(cache)) return("system") + if (!dir.exists(cache)) dir.create(cache, recursive=TRUE, showWarnings=FALSE) + if (!dir.exists(cache)) return(NULL) if (lib_version() >= 528) vers <- "new" else vers <- "old" + if (get_os() == "win") vers <- "win" - if (!file.exists(file.path(rappdirs::user_cache_dir("wandr"), "magic.mgc"))) { - suppressWarnings(unzip(system.file("db", vers, "magic.mgc.zip", package="wand"), - exdir=cache, overwrite=force)) + if (refresh | (!file.exists(file.path(rappdirs::user_cache_dir("wandr"), "magic.mgc")))) { + unzip(system.file("db", vers, "magic.mgc.zip", package="wand"), + exdir=cache, overwrite=TRUE) } file.path(rappdirs::user_cache_dir("wandr"), "magic.mgc") diff --git a/README.Rmd b/README.Rmd index 8f0060a..4044888 100644 --- a/README.Rmd +++ b/README.Rmd @@ -6,14 +6,14 @@ output: rmarkdown::github_document `wand` : Retrieve 'Magic' Attributes from Files and Directories -The `libmagic` library must be installed and available to use this. +The `libmagic` library must be installed on *nix/macOS and available to use this. - `apt-get install libmagic-dev` on Debian-ish systems - `brew install libmagic` on macOS While the package was developed using the 5.28 version of `libmagic` it has been configured to work with older versions. Note that some fields in the resultant data frame might not be available with older library versions. When using the function `magic_wand_file()` it checks for which version of `libmagic` is installed on your system and provides a suitable `magic.mgc` file for it. -The package should also be pretty straightforward to get working on Windows. Assistance to do that is welcome. Think of all the fame and glory you'll receive! +The package also works on Windows but it's a bit of a hack because, well, _Windows_. Seriously, folks, use a real operating system. The Windows version makes two `system2()` calls, so it's sub-optimal at best. Help to get it working in C would be greatly appreciated. The following functions are implemented: diff --git a/README.md b/README.md index aefbf48..002f9b6 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ `wand` : Retrieve 'Magic' Attributes from Files and Directories -The `libmagic` library must be installed and available to use this. +The `libmagic` library must be installed on \*nix/macOS and available to use this. - `apt-get install libmagic-dev` on Debian-ish systems - `brew install libmagic` on macOS While the package was developed using the 5.28 version of `libmagic` it has been configured to work with older versions. Note that some fields in the resultant data frame might not be available with older library versions. When using the function `magic_wand_file()` it checks for which version of `libmagic` is installed on your system and provides a suitable `magic.mgc` file for it. -The package should also be pretty straightforward to get working on Windows. Assistance to do that is welcome. Think of all the fame and glory you'll receive! +The package also works on Windows but it's a bit of a hack because, well, *Windows*. Seriously, folks, use a real operating system. The Windows version makes two `system2()` calls, so it's sub-optimal at best. Help to get it working in C would be greatly appreciated. The following functions are implemented: @@ -68,7 +68,7 @@ system.file("img", package="wand") %>% packageVersion("wand") ``` - ## [1] '0.1.0' + ## [1] '0.2.0' ### Test Results @@ -79,7 +79,7 @@ library(testthat) date() ``` - ## [1] "Sat Aug 13 12:12:14 2016" + ## [1] "Sun Aug 14 18:36:44 2016" ``` r test_dir("tests/") diff --git a/configure.win b/configure.win new file mode 100755 index 0000000..e69de29 diff --git a/inst/db/win/magic.mgc.zip b/inst/db/win/magic.mgc.zip new file mode 100644 index 0000000..fac0865 Binary files /dev/null and b/inst/db/win/magic.mgc.zip differ diff --git a/inst/exec/file.exe b/inst/exec/file.exe new file mode 100755 index 0000000..44eab60 Binary files /dev/null and b/inst/exec/file.exe differ diff --git a/man/incant.Rd b/man/incant.Rd index 1868be8..2063a0a 100644 --- a/man/incant.Rd +++ b/man/incant.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R +% Please edit documentation in R/wand.r \name{incant} \alias{incant} \title{Retrieve 'magic' attributes from files and directories} diff --git a/man/magic_wand_file.Rd b/man/magic_wand_file.Rd index 19798ec..2e19b27 100644 --- a/man/magic_wand_file.Rd +++ b/man/magic_wand_file.Rd @@ -4,10 +4,10 @@ \alias{magic_wand_file} \title{Use the "magic" file that comes with the package} \usage{ -magic_wand_file(force = FALSE) +magic_wand_file(refresh = FALSE) } \arguments{ -\item{force}{ensure the lastest copy of the pacakge "magic" +\item{refresh}{ensure the lastest copy of the pacakge "magic" database is used.} } \description{ diff --git a/src/Makevars b/src/Makevars index 6fecd53..9e2918a 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1 +1,8 @@ -PKG_LIBS = -lmagic \ No newline at end of file +PKG_LIBS = -lmagic + +all: clean + +clean: + rm -f $(OBJECTS) wand.dll + +.PHONY: all clean \ No newline at end of file diff --git a/src/Makevars.win b/src/Makevars.win new file mode 100755 index 0000000..27a2011 --- /dev/null +++ b/src/Makevars.win @@ -0,0 +1,9 @@ +all: winlibs + +clean: + rm -f $(OBJECTS) wand.dll + +winlibs: + "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "../tools/getfile.R" + +.PHONY: all winlibs clean \ No newline at end of file diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp old mode 100644 new mode 100755 index 3327aa4..5d456d0 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -5,15 +5,15 @@ using namespace Rcpp; -// incant -DataFrame incant(CharacterVector path, std::string magic_db); -RcppExport SEXP wand_incant(SEXP pathSEXP, SEXP magic_dbSEXP) { +// incant_ +DataFrame incant_(CharacterVector path, std::string magic_db); +RcppExport SEXP wand_incant_(SEXP pathSEXP, SEXP magic_dbSEXP) { BEGIN_RCPP Rcpp::RObject __result; Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< CharacterVector >::type path(pathSEXP); Rcpp::traits::input_parameter< std::string >::type magic_db(magic_dbSEXP); - __result = Rcpp::wrap(incant(path, magic_db)); + __result = Rcpp::wrap(incant_(path, magic_db)); return __result; END_RCPP } diff --git a/src/wand.cpp b/src/wand.cpp index b688af4..13e7483 100644 --- a/src/wand.cpp +++ b/src/wand.cpp @@ -1,34 +1,23 @@ #include + using namespace Rcpp; +#ifdef _WIN32 +#define WINDOWS +#endif + +#ifdef _WIN64 +#define WINDOWS +#endif + +#ifndef WINDOWS #include "magic.h" #include "limits.h" +#endif -//' Retrieve 'magic' attributes from files and directories -//' -//' @param path character vector of files to use magic on -//' @param magic_db either "\code{system}" (the default) to use the system -//' \code{magic} database or an atomic character vector with a -//' colon-separated list of full paths to custom \code{magic} database(s). -//' @return a \code{tibble} / \code{data.frame} of file magic attributes. -//' Specifically, mime type, encoding, possible file extensions and -//' type description are returned as colums in the data frame along -//' with \code{path}. -//' @note Various fields might not be available depending on the version -//' of \code{libmagic} you have installed. -//' @references See \url{http://openpreservation.org/blog/2012/08/09/magic-editing-and-creation-primer/} -//' for information on how to create your own \code{magic} database -//' @export -//' @examples -//' library(magrittr) -//' library(dplyr) -//' -//' system.file("img", package="filemagic") %>% -//' list.files(full.names=TRUE) %>% -//' incant() %>% -//' glimpse() +#ifndef WINDOWS // [[Rcpp::export]] -DataFrame incant(CharacterVector path, std::string magic_db="system") { +DataFrame incant_(CharacterVector path, std::string magic_db="system") { unsigned int input_size = path.size(); @@ -149,6 +138,11 @@ DataFrame incant(CharacterVector path, std::string magic_db="system") { return(df); } +#else +DataFrame incant_(CharacterVector path, std::string magic_db="system") { + return(DataFrame::create()); +} +#endif // [[Rcpp::export]] int lib_version() { diff --git a/tools/getfile.R b/tools/getfile.R new file mode 100755 index 0000000..8095a11 --- /dev/null +++ b/tools/getfile.R @@ -0,0 +1,5 @@ +if(getRversion() < "3.3.0") setInternet2() +download.file("https://www.stats.ox.ac.uk/pub/Rtools/goodies/file-5.03.zip", "file.zip", quiet = TRUE) +dir.create("../inst/exec", showWarnings = FALSE) +unzip("file.zip", "file.exe", exdir = "../inst/exec") +unlink("file.zip")