Browse Source

Addresses #4

master
boB Rudis 2 years ago
parent
commit
47c773be44
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 5
      NEWS.md
  2. 24
      R/get-content-type.R
  3. 2
      R/guess-content-type.R
  4. BIN
      R/sysdata.rda
  5. 3
      inst/extdat/no-guess/csv.docx
  6. 0
      inst/extdat/pass-through/actions.csv
  7. 0
      inst/extdat/pass-through/actions.txt
  8. 0
      inst/extdat/pass-through/actions.xlsx
  9. 0
      inst/extdat/pass-through/test.au
  10. 0
      inst/extdat/pass-through/test.bin
  11. 0
      inst/extdat/pass-through/test.bmp
  12. 0
      inst/extdat/pass-through/test.dtd
  13. 0
      inst/extdat/pass-through/test.emf
  14. 0
      inst/extdat/pass-through/test.eps
  15. 0
      inst/extdat/pass-through/test.fli
  16. 0
      inst/extdat/pass-through/test.gif
  17. 0
      inst/extdat/pass-through/test.ico
  18. 0
      inst/extdat/pass-through/test.jpg
  19. 0
      inst/extdat/pass-through/test.mp3
  20. 0
      inst/extdat/pass-through/test.odt
  21. 0
      inst/extdat/pass-through/test.ogg
  22. 0
      inst/extdat/pass-through/test.pcx
  23. 0
      inst/extdat/pass-through/test.pdf
  24. 0
      inst/extdat/pass-through/test.pl
  25. 0
      inst/extdat/pass-through/test.png
  26. 0
      inst/extdat/pass-through/test.pnm
  27. 0
      inst/extdat/pass-through/test.ppm
  28. 0
      inst/extdat/pass-through/test.ppt
  29. 0
      inst/extdat/pass-through/test.ps
  30. 0
      inst/extdat/pass-through/test.psd
  31. 0
      inst/extdat/pass-through/test.py
  32. 0
      inst/extdat/pass-through/test.rtf
  33. 0
      inst/extdat/pass-through/test.sh
  34. 0
      inst/extdat/pass-through/test.tar
  35. 0
      inst/extdat/pass-through/test.tar.gz
  36. 0
      inst/extdat/pass-through/test.tga
  37. 0
      inst/extdat/pass-through/test.txt
  38. 0
      inst/extdat/pass-through/test.txt.gz
  39. 0
      inst/extdat/pass-through/test.wav
  40. 0
      inst/extdat/pass-through/test.wmf
  41. 0
      inst/extdat/pass-through/test.xcf
  42. 0
      inst/extdat/pass-through/test.xml
  43. 0
      inst/extdat/pass-through/test.xpm
  44. 0
      inst/extdat/pass-through/test.zip
  45. 0
      inst/extdat/pass-through/test_128_44_jstereo.mp3
  46. 0
      inst/extdat/pass-through/test_excel.xlsm
  47. 0
      inst/extdat/pass-through/test_excel.xlsx
  48. 0
      inst/extdat/pass-through/test_excel_2000.xls
  49. 0
      inst/extdat/pass-through/test_excel_spreadsheet.xml
  50. 0
      inst/extdat/pass-through/test_excel_web_archive.mht
  51. 0
      inst/extdat/pass-through/test_nocompress.tif
  52. 0
      inst/extdat/pass-through/test_powerpoint.pptm
  53. 0
      inst/extdat/pass-through/test_powerpoint.pptx
  54. 0
      inst/extdat/pass-through/test_word.docm
  55. 0
      inst/extdat/pass-through/test_word.docx
  56. 0
      inst/extdat/pass-through/test_word_2000.doc
  57. 0
      inst/extdat/pass-through/test_word_6.0_95.doc
  58. 6
      inst/tinytest/test_wand.R
  59. 9
      man/get_content_type.Rd
  60. 2
      man/guess_content_type.Rd

5
NEWS.md

@ -1,3 +1,8 @@
# 0.5.1
- new `guess` logical parameter to `get_content_type()` to control
whether it is used as a last resort passthrough (addresses #4)
- updated external guess database
# 0.5.0
- {tidytest}
- CRAN release

24
R/get-content-type.R

@ -10,12 +10,14 @@
#'
#' @md
#' @param path path to a file
#' @param ... passed on to [guess_content_type()]
#' @param guess if `TRUE` (the default), calls [guess_content_type()] if
#' no internal rules match the magic header
#' @param ... passed on to [guess_content_type()] if `guess` is `TRUE`
#' @return character vector
#' @export
#' @examples
#' get_content_type(system.file("extdat", "test.pdf", package="wand"))
get_content_type <- function(path, ...) {
#' get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
get_content_type <- function(path, guess = TRUE, ...) {
path <- path.expand(path[1])
if (!file.exists(path)) stop("File not found.", call.=FALSE)
@ -28,9 +30,14 @@ get_content_type <- function(path, ...) {
if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {
guessed_name <- guess_content_type(path)
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
if (guess) {
guessed_name <- guess_content_type(path)
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
}
return("application/msword")
}
if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf")
@ -107,8 +114,10 @@ get_content_type <- function(path, ...) {
office_type <- check_office(hdr, path)
if (length(office_type) > 0) return(office_type)
guessed_name <- guess_content_type(path)
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
if (guess) {
guessed_name <- guess_content_type(path)
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
}
return("application/zip")
@ -131,6 +140,7 @@ get_content_type <- function(path, ...) {
if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")
if (!guess) return("???")
return(guess_content_type(path, ...))

2
R/guess-content-type.R

@ -16,7 +16,7 @@
#' @return character vector
#' @export
#' @examples
#' guess_content_type(system.file("extdat", "test.pdf", package="wand"))
#' guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
guess_content_type <- function(path, not_found = "???", custom_db = NULL) {
path <- path.expand(path[1])

BIN
R/sysdata.rda

Binary file not shown.

3
inst/extdat/no-guess/csv.docx

@ -0,0 +1,3 @@
Kid Name,Weight,Age
Nakshatra,12,1.5
Titas,16,6

0
inst/extdat/actions.csv → inst/extdat/pass-through/actions.csv

Can't render this file because it contains an unexpected character in line 2 and column 148.

0
inst/extdat/actions.txt → inst/extdat/pass-through/actions.txt

0
inst/extdat/actions.xlsx → inst/extdat/pass-through/actions.xlsx

0
inst/extdat/test.au → inst/extdat/pass-through/test.au

0
inst/extdat/test.bin → inst/extdat/pass-through/test.bin

0
inst/extdat/test.bmp → inst/extdat/pass-through/test.bmp

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 29 KiB

0
inst/extdat/test.dtd → inst/extdat/pass-through/test.dtd

0
inst/extdat/test.emf → inst/extdat/pass-through/test.emf

0
inst/extdat/test.eps → inst/extdat/pass-through/test.eps

0
inst/extdat/test.fli → inst/extdat/pass-through/test.fli

0
inst/extdat/test.gif → inst/extdat/pass-through/test.gif

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

0
inst/extdat/test.ico → inst/extdat/pass-through/test.ico

Before

Width:  |  Height:  |  Size: 318 B

After

Width:  |  Height:  |  Size: 318 B

0
inst/extdat/test.jpg → inst/extdat/pass-through/test.jpg

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

0
inst/extdat/test.mp3 → inst/extdat/pass-through/test.mp3

0
inst/extdat/test.odt → inst/extdat/pass-through/test.odt

0
inst/extdat/test.ogg → inst/extdat/pass-through/test.ogg

0
inst/extdat/test.pcx → inst/extdat/pass-through/test.pcx

0
inst/extdat/test.pdf → inst/extdat/pass-through/test.pdf

0
inst/extdat/test.pl → inst/extdat/pass-through/test.pl

0
inst/extdat/test.png → inst/extdat/pass-through/test.png

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

0
inst/extdat/test.pnm → inst/extdat/pass-through/test.pnm

0
inst/extdat/test.ppm → inst/extdat/pass-through/test.ppm

0
inst/extdat/test.ppt → inst/extdat/pass-through/test.ppt

0
inst/extdat/test.ps → inst/extdat/pass-through/test.ps

0
inst/extdat/test.psd → inst/extdat/pass-through/test.psd

0
inst/extdat/test.py → inst/extdat/pass-through/test.py

0
inst/extdat/test.rtf → inst/extdat/pass-through/test.rtf

0
inst/extdat/test.sh → inst/extdat/pass-through/test.sh

0
inst/extdat/test.tar → inst/extdat/pass-through/test.tar

0
inst/extdat/test.tar.gz → inst/extdat/pass-through/test.tar.gz

0
inst/extdat/test.tga → inst/extdat/pass-through/test.tga

0
inst/extdat/test.txt → inst/extdat/pass-through/test.txt

0
inst/extdat/test.txt.gz → inst/extdat/pass-through/test.txt.gz

0
inst/extdat/test.wav → inst/extdat/pass-through/test.wav

0
inst/extdat/test.wmf → inst/extdat/pass-through/test.wmf

0
inst/extdat/test.xcf → inst/extdat/pass-through/test.xcf

0
inst/extdat/test.xml → inst/extdat/pass-through/test.xml

0
inst/extdat/test.xpm → inst/extdat/pass-through/test.xpm

0
inst/extdat/test.zip → inst/extdat/pass-through/test.zip

0
inst/extdat/test_128_44_jstereo.mp3 → inst/extdat/pass-through/test_128_44_jstereo.mp3

0
inst/extdat/test_excel.xlsm → inst/extdat/pass-through/test_excel.xlsm

0
inst/extdat/test_excel.xlsx → inst/extdat/pass-through/test_excel.xlsx

0
inst/extdat/test_excel_2000.xls → inst/extdat/pass-through/test_excel_2000.xls

0
inst/extdat/test_excel_spreadsheet.xml → inst/extdat/pass-through/test_excel_spreadsheet.xml

0
inst/extdat/test_excel_web_archive.mht → inst/extdat/pass-through/test_excel_web_archive.mht

0
inst/extdat/test_nocompress.tif → inst/extdat/pass-through/test_nocompress.tif

0
inst/extdat/test_powerpoint.pptm → inst/extdat/pass-through/test_powerpoint.pptm

0
inst/extdat/test_powerpoint.pptx → inst/extdat/pass-through/test_powerpoint.pptx

0
inst/extdat/test_word.docm → inst/extdat/pass-through/test_word.docm

0
inst/extdat/test_word.docx → inst/extdat/pass-through/test_word.docx

0
inst/extdat/test_word_2000.doc → inst/extdat/pass-through/test_word_2000.doc

0
inst/extdat/test_word_6.0_95.doc → inst/extdat/pass-through/test_word_6.0_95.doc

6
inst/tinytest/test_wand.R

@ -58,8 +58,12 @@ list(
), test.zip = "application/zip"
) -> results
fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE)
fils <- list.files(system.file("extdat", "pass-through", package="wand"), full.names=TRUE)
tst <- lapply(fils, get_content_type)
names(tst) <- basename(fils)
for(n in names(tst)) expect_identical(results[[n]], tst[[n]])
no_guess <- system.file("extdat", "no-guess", "csv.docx", package = "wand")
expect_equal(get_content_type(no_guess, guess = FALSE), "???")

9
man/get_content_type.Rd

@ -4,12 +4,15 @@
\alias{get_content_type}
\title{Discover MIME type of a file based on contents}
\usage{
get_content_type(path, ...)
get_content_type(path, guess = TRUE, ...)
}
\arguments{
\item{path}{path to a file}
\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}}}
\item{guess}{if \code{TRUE} (the default), calls \code{\link[=guess_content_type]{guess_content_type()}} if
no internal rules match the magic header}
\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}} if \code{guess} is \code{TRUE}}
}
\value{
character vector
@ -25,5 +28,5 @@ comparisons are required/desired. If no match is found, \code{???} is returned
(see \code{\link[=guess_content_type]{guess_content_type()}} for how to override this behaviour).
}
\examples{
get_content_type(system.file("extdat", "test.pdf", package="wand"))
get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
}

2
man/guess_content_type.Rd

@ -27,5 +27,5 @@ return one or more associated types for a given input path. If no match is
found, \code{???} is returned.
}
\examples{
guess_content_type(system.file("extdat", "test.pdf", package="wand"))
guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
}

Loading…
Cancel
Save