|
|
|
#' Discover MIME type of a file based on contents
|
|
|
|
#'
|
|
|
|
#' There are a limited number of header "magic" bytes checked directly by
|
|
|
|
#' this function but cover quite a bit of ground. After that, [guess_content_type()] is called which uses
|
|
|
|
#' file extension-to-MIME mappings. File an issue or PR if more magic-byte-level
|
|
|
|
#' comparisons are required/desired. If no match is found, `???` is returned.
|
|
|
|
#'
|
|
|
|
#' @details
|
|
|
|
#' Initial in-R header mapping logic borrowed from `MimeTypes.java` from
|
|
|
|
#' [`servoy-client`](https://github.com/Servoy/servoy-client)
|
|
|
|
#'
|
|
|
|
#' @md
|
|
|
|
#' @param path path to a file
|
|
|
|
#' @return character vector
|
|
|
|
#' @export
|
|
|
|
#' @examples
|
|
|
|
#' get_content_type(system.file("extdat", "test.pdf", package="wand"))
|
|
|
|
get_content_type <- function(path) {
|
|
|
|
|
|
|
|
path <- path.expand(path)
|
|
|
|
if (!file.exists(path)) stop("File not found.", call.=FALSE)
|
|
|
|
|
|
|
|
hdr <- readBin(path, "raw", n=1024)
|
|
|
|
|
|
|
|
if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
|
|
|
|
|
|
|
|
if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {
|
|
|
|
guessed_name <- guess_content_type(path)
|
|
|
|
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
|
|
|
|
return("application/msword")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf")
|
|
|
|
if (all(c(0x25,0x50,0x44,0x46) == hdr[1:4])) return("application/x-pdf")
|
|
|
|
|
|
|
|
if (all(c(0x38,0x42,0x50,0x53,0x00,0x01) == hdr[1:6])) return("image/photoshop")
|
|
|
|
|
|
|
|
if (all(c(0x25,0x21,0x50,0x53) == hdr[1:4])) return("application/postscript")
|
|
|
|
|
|
|
|
if (all(c(0xff,0xfb,0x30) == hdr[1:3])) return("audio/mp3")
|
|
|
|
if (all(c(0xff,0xfb,0xd0) == hdr[1:3])) return("audio/mp3")
|
|
|
|
if (all(c(0xff,0xfb,0x90) == hdr[1:3])) return("audio/mp3")
|
|
|
|
if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3")
|
|
|
|
if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object")
|
|
|
|
|
|
|
|
if (hdr[1] == 0x3c) { # "<"
|
|
|
|
if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html"
|
|
|
|
if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML"
|
|
|
|
if (all(c(0x48,0x45,0x41,0x44) == hdr[2:5])) return("text/html") # "HEAD"
|
|
|
|
if (all(c(0x68,0x65,0x61,0x64) == hdr[2:5])) return("text/html") # "head"
|
|
|
|
if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all(c(0xfe,0xff) == hdr[1:2])) {
|
|
|
|
if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all(c(0x42,0x4d) == hdr[1:2])) return("image/bmp")
|
|
|
|
if (all(c(0x49,0x49,0x2a,0x00) == hdr[1:4])) return("image/tiff")
|
|
|
|
if (all(c(0x4D,0x4D,0x00,0x2a) == hdr[1:4])) return("image/tiff")
|
|
|
|
if (all(c(0x47,0x49,0x46,0x38) == hdr[1:4])) return("image/gif")
|
|
|
|
if (all(c(0x23,0x64,0x65,0x66) == hdr[1:4])) return("image/x-bitmap")
|
|
|
|
if (all(c(0x21,0x20,0x58,0x50,0x4d,0x32) == hdr[1:6])) return("image/x-pixmap")
|
|
|
|
if (all(c(137,80,78,71,13,10,26,10) == hdr[1:8])) return("image/png")
|
|
|
|
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:11]))
|
|
|
|
return("application/javascript")
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:13]))
|
|
|
|
return("application/javascript")
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:15]))
|
|
|
|
return("application/javascript")
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:17]))
|
|
|
|
return("application/javascript")
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65) == hdr[1:19]))
|
|
|
|
return("application/javascript")
|
|
|
|
if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:21]))
|
|
|
|
return("application/javascript")
|
|
|
|
|
|
|
|
if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) {
|
|
|
|
if (0xE0 == hdr[4]) return("image/jpeg")
|
|
|
|
if (0xE1 == hdr[4]) {
|
|
|
|
if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif
|
|
|
|
}
|
|
|
|
if (0xEE == hdr[4]) return("image/jpg")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all(c(0x41,0x43) == hdr[1:2]) && all(c(0x00,0x00,0x00,0x00,0x00) == hdr[7:11]))
|
|
|
|
return("application/acad")
|
|
|
|
|
|
|
|
if (all(c(0x2E,0x73,0x6E,0x64) == hdr[1:4])) return("audio/basic")
|
|
|
|
if (all(c(0x64,0x6E,0x73,0x2E) == hdr[1:4])) return("audio/basic")
|
|
|
|
if (all(c(0x52,0x49,0x46,0x46) == hdr[1:4])) return("audio/x-wav") # "RIFF"
|
|
|
|
|
|
|
|
if (all(c(0x50, 0x4b) == hdr[1:2])) { # "PK"
|
|
|
|
|
|
|
|
office_type <- check_office(hdr, path)
|
|
|
|
if (length(office_type) > 0) return(office_type)
|
|
|
|
|
|
|
|
guessed_name <- guess_content_type(path)
|
|
|
|
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
|
|
|
|
|
|
|
|
return("application/zip")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")
|
|
|
|
|
|
|
|
if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) return("application/pax")
|
|
|
|
|
|
|
|
if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
|
|
|
|
if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")
|
|
|
|
|
|
|
|
|
|
|
|
return(guess_content_type(path))
|
|
|
|
|
|
|
|
}
|