diff --git a/NEWS.md b/NEWS.md index 28e0098..9d4a8fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# 0.5.1 +- new `guess` logical parameter to `get_content_type()` to control + whether it is used as a last resort passthrough (addresses #4) +- updated external guess database + # 0.5.0 - {tidytest} - CRAN release diff --git a/R/get-content-type.R b/R/get-content-type.R index 262e56f..8b91754 100644 --- a/R/get-content-type.R +++ b/R/get-content-type.R @@ -10,12 +10,14 @@ #' #' @md #' @param path path to a file -#' @param ... passed on to [guess_content_type()] +#' @param guess if `TRUE` (the default), calls [guess_content_type()] if +#' no internal rules match the magic header +#' @param ... passed on to [guess_content_type()] if `guess` is `TRUE` #' @return character vector #' @export #' @examples -#' get_content_type(system.file("extdat", "test.pdf", package="wand")) -get_content_type <- function(path, ...) { +#' get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand")) +get_content_type <- function(path, guess = TRUE, ...) { path <- path.expand(path[1]) if (!file.exists(path)) stop("File not found.", call.=FALSE) @@ -28,9 +30,14 @@ get_content_type <- function(path, ...) { if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm") if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) { - guessed_name <- guess_content_type(path) - if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) + + if (guess) { + guessed_name <- guess_content_type(path) + if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) + } + return("application/msword") + } if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf") @@ -107,8 +114,10 @@ get_content_type <- function(path, ...) { office_type <- check_office(hdr, path) if (length(office_type) > 0) return(office_type) - guessed_name <- guess_content_type(path) - if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) + if (guess) { + guessed_name <- guess_content_type(path) + if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) + } return("application/zip") @@ -131,6 +140,7 @@ get_content_type <- function(path, ...) { if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg") if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg") + if (!guess) return("???") return(guess_content_type(path, ...)) diff --git a/R/guess-content-type.R b/R/guess-content-type.R index 4381cd3..2397f25 100644 --- a/R/guess-content-type.R +++ b/R/guess-content-type.R @@ -16,7 +16,7 @@ #' @return character vector #' @export #' @examples -#' guess_content_type(system.file("extdat", "test.pdf", package="wand")) +#' guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand")) guess_content_type <- function(path, not_found = "???", custom_db = NULL) { path <- path.expand(path[1]) diff --git a/R/sysdata.rda b/R/sysdata.rda index c564561..f5afff7 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/inst/extdat/no-guess/csv.docx b/inst/extdat/no-guess/csv.docx new file mode 100644 index 0000000..ccffa3b --- /dev/null +++ b/inst/extdat/no-guess/csv.docx @@ -0,0 +1,3 @@ +Kid Name,Weight,Age +Nakshatra,12,1.5 +Titas,16,6 diff --git a/inst/extdat/actions.csv b/inst/extdat/pass-through/actions.csv similarity index 100% rename from inst/extdat/actions.csv rename to inst/extdat/pass-through/actions.csv diff --git a/inst/extdat/actions.txt b/inst/extdat/pass-through/actions.txt similarity index 100% rename from inst/extdat/actions.txt rename to inst/extdat/pass-through/actions.txt diff --git a/inst/extdat/actions.xlsx b/inst/extdat/pass-through/actions.xlsx similarity index 100% rename from inst/extdat/actions.xlsx rename to inst/extdat/pass-through/actions.xlsx diff --git a/inst/extdat/test.au b/inst/extdat/pass-through/test.au similarity index 100% rename from inst/extdat/test.au rename to inst/extdat/pass-through/test.au diff --git a/inst/extdat/test.bin b/inst/extdat/pass-through/test.bin similarity index 100% rename from inst/extdat/test.bin rename to inst/extdat/pass-through/test.bin diff --git a/inst/extdat/test.bmp b/inst/extdat/pass-through/test.bmp similarity index 100% rename from inst/extdat/test.bmp rename to inst/extdat/pass-through/test.bmp diff --git a/inst/extdat/test.dtd b/inst/extdat/pass-through/test.dtd similarity index 100% rename from inst/extdat/test.dtd rename to inst/extdat/pass-through/test.dtd diff --git a/inst/extdat/test.emf b/inst/extdat/pass-through/test.emf similarity index 100% rename from inst/extdat/test.emf rename to inst/extdat/pass-through/test.emf diff --git a/inst/extdat/test.eps b/inst/extdat/pass-through/test.eps similarity index 100% rename from inst/extdat/test.eps rename to inst/extdat/pass-through/test.eps diff --git a/inst/extdat/test.fli b/inst/extdat/pass-through/test.fli similarity index 100% rename from inst/extdat/test.fli rename to inst/extdat/pass-through/test.fli diff --git a/inst/extdat/test.gif b/inst/extdat/pass-through/test.gif similarity index 100% rename from inst/extdat/test.gif rename to inst/extdat/pass-through/test.gif diff --git a/inst/extdat/test.ico b/inst/extdat/pass-through/test.ico similarity index 100% rename from inst/extdat/test.ico rename to inst/extdat/pass-through/test.ico diff --git a/inst/extdat/test.jpg b/inst/extdat/pass-through/test.jpg similarity index 100% rename from inst/extdat/test.jpg rename to inst/extdat/pass-through/test.jpg diff --git a/inst/extdat/test.mp3 b/inst/extdat/pass-through/test.mp3 similarity index 100% rename from inst/extdat/test.mp3 rename to inst/extdat/pass-through/test.mp3 diff --git a/inst/extdat/test.odt b/inst/extdat/pass-through/test.odt similarity index 100% rename from inst/extdat/test.odt rename to inst/extdat/pass-through/test.odt diff --git a/inst/extdat/test.ogg b/inst/extdat/pass-through/test.ogg similarity index 100% rename from inst/extdat/test.ogg rename to inst/extdat/pass-through/test.ogg diff --git a/inst/extdat/test.pcx b/inst/extdat/pass-through/test.pcx similarity index 100% rename from inst/extdat/test.pcx rename to inst/extdat/pass-through/test.pcx diff --git a/inst/extdat/test.pdf b/inst/extdat/pass-through/test.pdf similarity index 100% rename from inst/extdat/test.pdf rename to inst/extdat/pass-through/test.pdf diff --git a/inst/extdat/test.pl b/inst/extdat/pass-through/test.pl similarity index 100% rename from inst/extdat/test.pl rename to inst/extdat/pass-through/test.pl diff --git a/inst/extdat/test.png b/inst/extdat/pass-through/test.png similarity index 100% rename from inst/extdat/test.png rename to inst/extdat/pass-through/test.png diff --git a/inst/extdat/test.pnm b/inst/extdat/pass-through/test.pnm similarity index 100% rename from inst/extdat/test.pnm rename to inst/extdat/pass-through/test.pnm diff --git a/inst/extdat/test.ppm b/inst/extdat/pass-through/test.ppm similarity index 100% rename from inst/extdat/test.ppm rename to inst/extdat/pass-through/test.ppm diff --git a/inst/extdat/test.ppt b/inst/extdat/pass-through/test.ppt similarity index 100% rename from inst/extdat/test.ppt rename to inst/extdat/pass-through/test.ppt diff --git a/inst/extdat/test.ps b/inst/extdat/pass-through/test.ps similarity index 100% rename from inst/extdat/test.ps rename to inst/extdat/pass-through/test.ps diff --git a/inst/extdat/test.psd b/inst/extdat/pass-through/test.psd similarity index 100% rename from inst/extdat/test.psd rename to inst/extdat/pass-through/test.psd diff --git a/inst/extdat/test.py b/inst/extdat/pass-through/test.py similarity index 100% rename from inst/extdat/test.py rename to inst/extdat/pass-through/test.py diff --git a/inst/extdat/test.rtf b/inst/extdat/pass-through/test.rtf similarity index 100% rename from inst/extdat/test.rtf rename to inst/extdat/pass-through/test.rtf diff --git a/inst/extdat/test.sh b/inst/extdat/pass-through/test.sh similarity index 100% rename from inst/extdat/test.sh rename to inst/extdat/pass-through/test.sh diff --git a/inst/extdat/test.tar b/inst/extdat/pass-through/test.tar similarity index 100% rename from inst/extdat/test.tar rename to inst/extdat/pass-through/test.tar diff --git a/inst/extdat/test.tar.gz b/inst/extdat/pass-through/test.tar.gz similarity index 100% rename from inst/extdat/test.tar.gz rename to inst/extdat/pass-through/test.tar.gz diff --git a/inst/extdat/test.tga b/inst/extdat/pass-through/test.tga similarity index 100% rename from inst/extdat/test.tga rename to inst/extdat/pass-through/test.tga diff --git a/inst/extdat/test.txt b/inst/extdat/pass-through/test.txt similarity index 100% rename from inst/extdat/test.txt rename to inst/extdat/pass-through/test.txt diff --git a/inst/extdat/test.txt.gz b/inst/extdat/pass-through/test.txt.gz similarity index 100% rename from inst/extdat/test.txt.gz rename to inst/extdat/pass-through/test.txt.gz diff --git a/inst/extdat/test.wav b/inst/extdat/pass-through/test.wav similarity index 100% rename from inst/extdat/test.wav rename to inst/extdat/pass-through/test.wav diff --git a/inst/extdat/test.wmf b/inst/extdat/pass-through/test.wmf similarity index 100% rename from inst/extdat/test.wmf rename to inst/extdat/pass-through/test.wmf diff --git a/inst/extdat/test.xcf b/inst/extdat/pass-through/test.xcf similarity index 100% rename from inst/extdat/test.xcf rename to inst/extdat/pass-through/test.xcf diff --git a/inst/extdat/test.xml b/inst/extdat/pass-through/test.xml similarity index 100% rename from inst/extdat/test.xml rename to inst/extdat/pass-through/test.xml diff --git a/inst/extdat/test.xpm b/inst/extdat/pass-through/test.xpm similarity index 100% rename from inst/extdat/test.xpm rename to inst/extdat/pass-through/test.xpm diff --git a/inst/extdat/test.zip b/inst/extdat/pass-through/test.zip similarity index 100% rename from inst/extdat/test.zip rename to inst/extdat/pass-through/test.zip diff --git a/inst/extdat/test_128_44_jstereo.mp3 b/inst/extdat/pass-through/test_128_44_jstereo.mp3 similarity index 100% rename from inst/extdat/test_128_44_jstereo.mp3 rename to inst/extdat/pass-through/test_128_44_jstereo.mp3 diff --git a/inst/extdat/test_excel.xlsm b/inst/extdat/pass-through/test_excel.xlsm similarity index 100% rename from inst/extdat/test_excel.xlsm rename to inst/extdat/pass-through/test_excel.xlsm diff --git a/inst/extdat/test_excel.xlsx b/inst/extdat/pass-through/test_excel.xlsx similarity index 100% rename from inst/extdat/test_excel.xlsx rename to inst/extdat/pass-through/test_excel.xlsx diff --git a/inst/extdat/test_excel_2000.xls b/inst/extdat/pass-through/test_excel_2000.xls similarity index 100% rename from inst/extdat/test_excel_2000.xls rename to inst/extdat/pass-through/test_excel_2000.xls diff --git a/inst/extdat/test_excel_spreadsheet.xml b/inst/extdat/pass-through/test_excel_spreadsheet.xml similarity index 100% rename from inst/extdat/test_excel_spreadsheet.xml rename to inst/extdat/pass-through/test_excel_spreadsheet.xml diff --git a/inst/extdat/test_excel_web_archive.mht b/inst/extdat/pass-through/test_excel_web_archive.mht similarity index 100% rename from inst/extdat/test_excel_web_archive.mht rename to inst/extdat/pass-through/test_excel_web_archive.mht diff --git a/inst/extdat/test_nocompress.tif b/inst/extdat/pass-through/test_nocompress.tif similarity index 100% rename from inst/extdat/test_nocompress.tif rename to inst/extdat/pass-through/test_nocompress.tif diff --git a/inst/extdat/test_powerpoint.pptm b/inst/extdat/pass-through/test_powerpoint.pptm similarity index 100% rename from inst/extdat/test_powerpoint.pptm rename to inst/extdat/pass-through/test_powerpoint.pptm diff --git a/inst/extdat/test_powerpoint.pptx b/inst/extdat/pass-through/test_powerpoint.pptx similarity index 100% rename from inst/extdat/test_powerpoint.pptx rename to inst/extdat/pass-through/test_powerpoint.pptx diff --git a/inst/extdat/test_word.docm b/inst/extdat/pass-through/test_word.docm similarity index 100% rename from inst/extdat/test_word.docm rename to inst/extdat/pass-through/test_word.docm diff --git a/inst/extdat/test_word.docx b/inst/extdat/pass-through/test_word.docx similarity index 100% rename from inst/extdat/test_word.docx rename to inst/extdat/pass-through/test_word.docx diff --git a/inst/extdat/test_word_2000.doc b/inst/extdat/pass-through/test_word_2000.doc similarity index 100% rename from inst/extdat/test_word_2000.doc rename to inst/extdat/pass-through/test_word_2000.doc diff --git a/inst/extdat/test_word_6.0_95.doc b/inst/extdat/pass-through/test_word_6.0_95.doc similarity index 100% rename from inst/extdat/test_word_6.0_95.doc rename to inst/extdat/pass-through/test_word_6.0_95.doc diff --git a/inst/tinytest/test_wand.R b/inst/tinytest/test_wand.R index d27a91e..7b8cda8 100644 --- a/inst/tinytest/test_wand.R +++ b/inst/tinytest/test_wand.R @@ -58,8 +58,12 @@ list( ), test.zip = "application/zip" ) -> results -fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE) +fils <- list.files(system.file("extdat", "pass-through", package="wand"), full.names=TRUE) tst <- lapply(fils, get_content_type) names(tst) <- basename(fils) for(n in names(tst)) expect_identical(results[[n]], tst[[n]]) + +no_guess <- system.file("extdat", "no-guess", "csv.docx", package = "wand") +expect_equal(get_content_type(no_guess, guess = FALSE), "???") + diff --git a/man/get_content_type.Rd b/man/get_content_type.Rd index 771e971..674ee1d 100644 --- a/man/get_content_type.Rd +++ b/man/get_content_type.Rd @@ -4,12 +4,15 @@ \alias{get_content_type} \title{Discover MIME type of a file based on contents} \usage{ -get_content_type(path, ...) +get_content_type(path, guess = TRUE, ...) } \arguments{ \item{path}{path to a file} -\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}}} +\item{guess}{if \code{TRUE} (the default), calls \code{\link[=guess_content_type]{guess_content_type()}} if +no internal rules match the magic header} + +\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}} if \code{guess} is \code{TRUE}} } \value{ character vector @@ -25,5 +28,5 @@ comparisons are required/desired. If no match is found, \code{???} is returned (see \code{\link[=guess_content_type]{guess_content_type()}} for how to override this behaviour). } \examples{ -get_content_type(system.file("extdat", "test.pdf", package="wand")) +get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand")) } diff --git a/man/guess_content_type.Rd b/man/guess_content_type.Rd index f96d2d7..20db087 100644 --- a/man/guess_content_type.Rd +++ b/man/guess_content_type.Rd @@ -27,5 +27,5 @@ return one or more associated types for a given input path. If no match is found, \code{???} is returned. } \examples{ -guess_content_type(system.file("extdat", "test.pdf", package="wand")) +guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand")) }