#' Discover MIME type of a file based on contents #' #' There are a limited number of header "magic" bytes checked directly by #' this function but cover quite a bit of ground. After that, [guess_content_type()] #' is called which uses file extension-to-MIME mappings. #' #' File an issue or PR if more magic-byte-level #' comparisons are required/desired. If no match is found, `???` is returned #' (see [guess_content_type()] for how to override this behaviour). #' #' @md #' @param path path to a file #' @param guess if `TRUE` (the default), calls [guess_content_type()] if #' no internal rules match the magic header #' @param ... passed on to [guess_content_type()] if `guess` is `TRUE` #' @return character vector #' @export #' @examples #' get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand")) get_content_type <- function(path, guess = TRUE, ...) { path <- path.expand(path[1]) if (!file.exists(path)) stop("File not found.", call.=FALSE) hdr <- readBin(path, "raw", n=1024) if (all(c(0x4F,0x62,0x6A,0x01) == hdr[1:4])) return("application/vnd.apache.avro+binary") if (all(c(0x50,0x41,0x52,0x31) == hdr[1:4])) return("application/x-parquet") if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm") if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) { if (guess) { guessed_name <- guess_content_type(path) if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) } return("application/msword") } if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf") if (all(c(0x25,0x50,0x44,0x46) == hdr[1:4])) return("application/x-pdf") if (all(c(0x38,0x42,0x50,0x53,0x00,0x01) == hdr[1:6])) return("image/photoshop") if (all(c(0x25,0x21,0x50,0x53) == hdr[1:4])) return("application/postscript") if (all(c(0xff,0xfb,0x30) == hdr[1:3])) return("audio/mp3") if (all(c(0xff,0xfb,0xd0) == hdr[1:3])) return("audio/mp3") if (all(c(0xff,0xfb,0x90) == hdr[1:3])) return("audio/mp3") if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3") if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object") if (all(c(0x4c,0x5a,0x49,0x50) == hdr[1:4])) return("application/x-lzip") if (hdr[1] == 0x3c) { # "<" if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html" if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML" if (all(c(0x48,0x45,0x41,0x44) == hdr[2:5])) return("text/html") # "HEAD" if (all(c(0x68,0x65,0x61,0x64) == hdr[2:5])) return("text/html") # "head" if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml") } if (all(c(0x0a,0x0d,0x0d,0x0a) == hdr[1:4])) "application/x-pcapng" if (all(c(0xa1,0xb2,0xc3,0xd4) == hdr[1:4]) || all(c(0xd4,0xc3,0xb2,0xa1) == hdr[1:4])) return("application/x-cap") if (all(c(0xfe,0xff) == hdr[1:2])) { if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml") } if (all(c(0x42,0x4d) == hdr[1:2])) return("image/bmp") if (all(c(0x49,0x49,0x2a,0x00) == hdr[1:4])) return("image/tiff") if (all(c(0x4D,0x4D,0x00,0x2a) == hdr[1:4])) return("image/tiff") if (all(c(0x47,0x49,0x46,0x38) == hdr[1:4])) return("image/gif") if (all(c(0x23,0x64,0x65,0x66) == hdr[1:4])) return("image/x-bitmap") if (all(c(0x21,0x20,0x58,0x50,0x4d,0x32) == hdr[1:6])) return("image/x-pixmap") if (all(c(137,80,78,71,13,10,26,10) == hdr[1:8])) return("image/png") if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:11])) return("application/javascript") if (all(c(0x23,0x21,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:13])) return("application/javascript") if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65) == hdr[1:15])) return("application/javascript") if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:17])) return("application/javascript") if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65) == hdr[1:19])) return("application/javascript") if (all(c(0x23,0x21,0x2f,0x75,0x73,0x72,0x2f,0x62,0x69,0x6e,0x2f,0x65,0x6e,0x76,0x20,0x6e,0x6f,0x64,0x65,0x6a,0x73) == hdr[1:21])) return("application/javascript") if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) { if (0xDB == hdr[4]) return("image/jpeg") if (0xE0 == hdr[4]) return("image/jpeg") if (0xE1 == hdr[4]) { if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif } if (0xEE == hdr[4]) return("image/jpg") } if (all(c(0x41,0x43) == hdr[1:2]) && all(c(0x00,0x00,0x00,0x00,0x00) == hdr[7:11])) return("application/acad") if (all(c(0x2E,0x73,0x6E,0x64) == hdr[1:4])) return("audio/basic") if (all(c(0x64,0x6E,0x73,0x2E) == hdr[1:4])) return("audio/basic") if (all(c(0x52,0x49,0x46,0x46) == hdr[1:4])) return("audio/x-wav") # "RIFF" if (all(c(0x50, 0x4b) == hdr[1:2])) { # "PK" office_type <- check_office(hdr, path) if (length(office_type) > 0) return(office_type) if (guess) { guessed_name <- guess_content_type(path) if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name) } return("application/zip") } if (all(c(0x00,0x61,0x73,0x6d) == hdr[1:4])) return("application/wasm") if (all(c(0x37,0x7A,0xBC,0xAF,0x27,0x1C) == hdr[1:6])) return("application/x-7z-compressed") if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe") if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) { if (all(c(0x00,0x30,0x30) == hdr[263:265]) || all(c(0x20,0x20,0x00) == hdr[263:265])) { return("application/tar") } else { return("application/pax") } } if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg") if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg") if (!guess) return("???") return(guess_content_type(path, ...)) }