Browse Source

Added better jpeg + new lzip, 7-zip, wasm, pcap, avro, parquet detections

master
boB Rudis 6 years ago
parent
commit
95e723b824
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 80
      .vscode/c_cpp_properties.json
  2. 2
      DESCRIPTION
  3. 13
      NEWS.md
  4. 23
      R/get-content-type.R
  5. 2
      man/simplemagic_mime_db.Rd
  6. 71
      tests/testthat/test-wand.R

80
.vscode/c_cpp_properties.json

@ -0,0 +1,80 @@
{
"configurations": [
{
"name": "Mac",
"includePath": [
"${workspaceFolder}",
"/Library/Developer/CommandLineTools/usr/include/c++/v1",
"/usr/local/include",
"/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
"/Library/Developer/CommandLineTools/usr/include",
"/usr/include",
"/Library/Frameworks/R.framework/Versions/3.5/Resources/library/Rcpp/include",
"/Library/Frameworks/R.framework/Versions/3.5/PrivateHeaders",
"/Library/Frameworks/R.framework/Versions/3.5/Resources/include"
],
"defines": [],
"intelliSenseMode": "clang-x64",
"browse": {
"path": [
"${workspaceFolder}",
"/Library/Developer/CommandLineTools/usr/include/c++/v1",
"/usr/local/include",
"/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
"/Library/Developer/CommandLineTools/usr/include",
"/usr/include"
],
"limitSymbolsToIncludedHeaders": true,
"databaseFilename": ""
},
"macFrameworkPath": [
"/System/Library/Frameworks",
"/Library/Frameworks"
],
"compilerPath": "/usr/bin/clang",
"cStandard": "c11",
"cppStandard": "c++17"
},
{
"name": "Linux",
"includePath": [
"/usr/include",
"/usr/local/include",
"${workspaceFolder}"
],
"defines": [],
"intelliSenseMode": "clang-x64",
"browse": {
"path": [
"/usr/include",
"/usr/local/include",
"${workspaceFolder}"
],
"limitSymbolsToIncludedHeaders": true,
"databaseFilename": ""
}
},
{
"name": "Win32",
"includePath": [
"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include",
"${workspaceFolder}"
],
"defines": [
"_DEBUG",
"UNICODE",
"_UNICODE"
],
"intelliSenseMode": "msvc-x64",
"browse": {
"path": [
"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include/*",
"${workspaceFolder}"
],
"limitSymbolsToIncludedHeaders": true,
"databaseFilename": ""
}
}
],
"version": 3
}

2
DESCRIPTION

@ -1,7 +1,7 @@
Package: wand Package: wand
Type: Package Type: Package
Title: Retrieve 'Magic' Attributes from Files and Directories Title: Retrieve 'Magic' Attributes from Files and Directories
Version: 0.3.0 Version: 0.4.0
Date: 2018-09-16 Date: 2018-09-16
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),

13
NEWS.md

@ -1,3 +1,16 @@
# 0.4.0
- Enhanced jpeg detection
- Added distinction between pax and tar
- Added lzip detection
- Added 7-zip detection
- Added wasm file detection
- Added pcap and pcapng detection
- Added avro detection
- Added parquet detection
# 0.3.0
* Dropped libmagic
# 0.2.0 # 0.2.0
* Works on Windows * Works on Windows

23
R/get-content-type.R

@ -22,6 +22,9 @@ get_content_type <- function(path) {
hdr <- readBin(path, "raw", n=1024) hdr <- readBin(path, "raw", n=1024)
if (all(c(0x4F,0x62,0x6A,0x01) == hdr[1:4])) return("application/vnd.apache.avro+binary")
if (all(c(0x50,0x41,0x52,0x31) == hdr[1:4])) return("application/x-parquet")
if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm") if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) { if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {
@ -43,6 +46,8 @@ get_content_type <- function(path) {
if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3") if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3")
if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object") if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object")
if (all(c(0x4c,0x5a,0x49,0x50) == hdr[1:4])) return("application/x-lzip")
if (hdr[1] == 0x3c) { # "<" if (hdr[1] == 0x3c) { # "<"
if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html" if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html"
if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML" if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML"
@ -51,6 +56,11 @@ get_content_type <- function(path) {
if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml") if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml")
} }
if (all(c(0x0a,0x0d,0x0d,0x0a) == hdr[1:4])) "application/x-pcapng"
if (all(c(0xa1,0xb2,0xc3,0xd4) == hdr[1:4]) ||
all(c(0xd4,0xc3,0xb2,0xa1) == hdr[1:4])) return("application/x-cap")
if (all(c(0xfe,0xff) == hdr[1:2])) { if (all(c(0xfe,0xff) == hdr[1:2])) {
if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml") if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml")
} }
@ -77,6 +87,7 @@ get_content_type <- function(path) {
return("application/javascript") return("application/javascript")
if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) { if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) {
if (0xDB == hdr[4]) return("image/jpeg")
if (0xE0 == hdr[4]) return("image/jpeg") if (0xE0 == hdr[4]) return("image/jpeg")
if (0xE1 == hdr[4]) { if (0xE1 == hdr[4]) {
if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif
@ -103,9 +114,19 @@ get_content_type <- function(path) {
} }
if (all(c(0x00,0x61,0x73,0x6d) == hdr[1:4])) return("application/wasm")
if (all(c(0x37,0x7A,0xBC,0xAF,0x27,0x1C) == hdr[1:6])) return("application/x-7z-compressed")
if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe") if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")
if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) return("application/pax") if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) {
if (all(c(0x00,0x30,0x30) == hdr[263:265]) || all(c(0x20,0x20,0x00) == hdr[263:265])) {
return("application/tar")
} else {
return("application/pax")
}
}
if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg") if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg") if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")

2
man/simplemagic_mime_db.Rd

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aaa.R % Please edit documentation in R/aaa.r
\docType{data} \docType{data}
\name{simplemagic_mime_db} \name{simplemagic_mime_db}
\alias{simplemagic_mime_db} \alias{simplemagic_mime_db}

71
tests/testthat/test-wand.R

@ -5,36 +5,56 @@ test_that("Basic file tests work", {
actions.csv = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", actions.csv = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
actions.txt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", actions.txt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
actions.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", actions.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
test_1.2.class = "application/java-vm", test_1.3.class = "application/java-vm", test_1.2.class = "application/java-vm",
test_1.4.class = "application/java-vm", test_1.5.class = "application/java-vm", test_1.3.class = "application/java-vm",
test_128_44_jstereo.mp3 = "audio/mp3", test_excel_2000.xls = "application/msword", test_1.4.class = "application/java-vm",
test_excel_spreadsheet.xml = "application/xml", test_excel_web_archive.mht = "message/rfc822", test_1.5.class = "application/java-vm",
test_excel.xlsm = "application/zip", test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", test_128_44_jstereo.mp3 = "audio/mp3",
test_nocompress.tif = "image/tiff", test_powerpoint.pptm = "application/zip", test_excel_2000.xls = "application/msword",
test_excel_spreadsheet.xml = "application/xml",
test_excel_web_archive.mht = "message/rfc822",
test_excel.xlsm = "application/zip",
test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
test_nocompress.tif = "image/tiff",
test_powerpoint.pptm = "application/zip",
test_powerpoint.pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation", test_powerpoint.pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation",
test_word_2000.doc = "application/msword", test_word_6.0_95.doc = "application/msword", test_word_2000.doc = "application/msword",
test_word.docm = "application/zip", test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", test_word_6.0_95.doc = "application/msword",
test.au = "audio/basic", test.bin = c( test_word.docm = "application/zip",
test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
test.au = "audio/basic",
test.bin = c(
"application/mac-binary", "application/mac-binary",
"application/macbinary", "application/octet-stream", "application/x-binary", "application/macbinary", "application/octet-stream", "application/x-binary",
"application/x-macbinary" "application/x-macbinary"
), test.bmp = "image/bmp", test.dtd = "application/xml-dtd", ), test.bmp = "image/bmp",
test.emf = "application/x-msmetafile", test.eps = "application/postscript", test.dtd = "application/xml-dtd",
test.fli = c("video/flc", "video/fli", "video/x-fli"), test.gif = "image/gif", test.emf = "application/x-msmetafile",
test.ico = "image/x-icon", test.java = c( test.eps = "application/postscript",
test.fli = c("video/flc", "video/fli", "video/x-fli"),
test.gif = "image/gif",
test.ico = "image/x-icon",
test.java = c(
"text/plain", "text/x-java", "text/plain", "text/x-java",
"text/x-java-source" "text/x-java-source"
), test.jpg = "image/jpeg", test.mp3 = "audio/mp3", ), test.jpg = "image/jpeg",
test.odt = "application/vnd.oasis.opendocument.text", test.ogg = c( test.mp3 = "audio/mp3",
test.odt = "application/vnd.oasis.opendocument.text",
test.ogg = c(
"application/ogg", "application/ogg",
"audio/ogg" "audio/ogg"
), test.pcx = c("image/pcx", "image/x-pcx"), test.pdf = "application/pdf", ), test.pcx = c("image/pcx", "image/x-pcx"),
test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"), test.png = "image/png", test.pnm = c( test.pdf = "application/pdf",
test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"),
test.png = "image/png",
test.pnm = c(
"application/x-portable-anymap", "application/x-portable-anymap",
"image/x-portable-anymap" "image/x-portable-anymap"
), test.ppm = "image/x-portable-pixmap", ), test.ppm = "image/x-portable-pixmap",
test.ppt = "application/msword", test.ps = "application/postscript", test.ppt = "application/msword",
test.psd = "image/photoshop", test.py = c( test.ps = "application/postscript",
test.psd = "image/photoshop",
test.py = c(
"text/x-python", "text/x-python",
"text/x-script.phyton" "text/x-script.phyton"
), test.rtf = c( ), test.rtf = c(
@ -44,16 +64,21 @@ test_that("Basic file tests work", {
"application/x-bsh", "application/x-bsh",
"application/x-sh", "application/x-shar", "text/x-script.sh", "application/x-sh", "application/x-shar", "text/x-script.sh",
"text/x-sh" "text/x-sh"
), test.tar = "application/pax", test.tar.gz = c( ), test.tar = "application/tar",
test.tar.gz = c(
"application/octet-stream", "application/octet-stream",
"application/x-compressed", "application/x-gzip" "application/x-compressed", "application/x-gzip"
), test.tga = "image/x-tga", ), test.tga = "image/x-tga",
test.txt = "text/plain", test.txt.gz = c( test.txt = "text/plain",
test.txt.gz = c(
"application/octet-stream", "application/octet-stream",
"application/x-compressed", "application/x-gzip" "application/x-compressed", "application/x-gzip"
), test.wav = "audio/x-wav", ), test.wav = "audio/x-wav",
test.wmf = c("application/x-msmetafile", "windows/metafile"), test.xcf = "application/x-xcf", test.xml = "application/xml", test.wmf = c("application/x-msmetafile", "windows/metafile"),
test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"), test.zip = "application/zip" test.xcf = "application/x-xcf",
test.xml = "application/xml",
test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"),
test.zip = "application/zip"
) -> results ) -> results
fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE) fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE)

Loading…
Cancel
Save