Browse Source

initial commit

master
boB Rudis 6 years ago
commit
5fcca76d2d
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 11
      .Rbuildignore
  2. 1
      .codecov.yml
  3. 8
      .gitignore
  4. 6
      .travis.yml
  5. 26
      DESCRIPTION
  6. 2
      LICENSE
  7. 4
      NAMESPACE
  8. 2
      NEWS.md
  9. 11
      R/mactheknife-package.R
  10. 29
      R/read-dsstore.R
  11. 11
      R/zzz.R
  12. 64
      README.Rmd
  13. 84
      README.md
  14. BIN
      inst/extdat/DS_Store.ctf
  15. BIN
      inst/modules/__pycache__/dsstore.cpython-36.pyc
  16. 307
      inst/modules/dsstore.py
  17. 21
      mactheknife.Rproj
  18. 14
      man/mactheknife.Rd
  19. 21
      man/read_dsstore.Rd
  20. 2
      tests/test-all.R
  21. 6
      tests/testthat/test-mactheknife.R

11
.Rbuildignore

@ -0,0 +1,11 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$
^tmp$

1
.codecov.yml

@ -0,0 +1 @@
comment: false

8
.gitignore

@ -0,0 +1,8 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
src/*.o
src/*.so
src/*.dll

6
.travis.yml

@ -0,0 +1,6 @@
language: R
sudo: false
cache: packages
after_success:
- Rscript -e 'covr::codecov()'

26
DESCRIPTION

@ -0,0 +1,26 @@
Package: mactheknife
Type: Package
Title: Read 'macOS' .DS_Store' Files
Version: 0.1.0
Date: 2018-04-29
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")),
person("Sebastian", "Neef", email = "github@gehaxelt.in", role = c("aut", "cph"),
comment = "Python dsstore module <https://github.com/gehaxelt/Python-dsstore>")
)
Maintainer: Bob Rudis <bob@rud.is>
Description: A thin wrapper around the 'Python' 'dsstore' module
<https://github.com/gehaxelt/Python-dsstore> by 'Sebastian Neef'.
URL: https://github.com/hrbrmstr/mactheknife
BugReports: https://github.com/hrbrmstr/mactheknife/issues
SystemRequirements: Python
Encoding: UTF-8
License: MIT + file LICENSE
Suggests:
testthat,
covr
Depends:
R (>= 3.2.0),
reticulate
RoxygenNote: 6.0.1.9000

2
LICENSE

@ -0,0 +1,2 @@
YEAR: 2018
COPYRIGHT HOLDER: Bob Rudis

4
NAMESPACE

@ -0,0 +1,4 @@
# Generated by roxygen2: do not edit by hand
export(read_dsstore)
import(reticulate)

2
NEWS.md

@ -0,0 +1,2 @@
0.1.0
* Initial release

11
R/mactheknife-package.R

@ -0,0 +1,11 @@
#' Read 'macOS' .DS_Store' Files
#'
#' A thin wrapper around the 'Python' 'dsstore' module
#' <https://github.com/gehaxelt/Python-dsstore> by 'Sebastian Neef'.
#'
#' @md
#' @name mactheknife
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import reticulate
NULL

29
R/read-dsstore.R

@ -0,0 +1,29 @@
#' Read a `.DS_Store` file
#'
#' @md
#' @param path a path to a valid `.DS_Store` file ([path.expand()] will be called)
#' @return a character vector of filenames in the `.DS_Store` file or
#' a length 0 character vector if no parseable data was found
#' @export
#' @examples
#' read_dsstore(system.file("extdat", "DS_Store.ctf", package = "mactheknife"))
read_dsstore <- function(path) {
stor_path <- path.expand(path)
stor_path <- normalizePath(stor_path)
fil <- os$open(stor_path, os$O_RDONLY)
contents <- os$read(fil, as.integer(file.size(stor_path)))
os$close(fil)
d <- dsstore$DS_Store(contents)
ds_fils <- d$traverse_root()
out <- unique(ds_fils)
if (length(out) == 0) out <- character()
out
}

11
R/zzz.R

@ -0,0 +1,11 @@
dsstore <- NULL
os <- NULL
.onLoad <- function(libname, pkgname) {
dsstore <<- reticulate::import_from_path(
module = "dsstore",
path = system.file("modules", package = "mactheknife"),
delay_load = TRUE
)
os <<- reticulate::import("os", delay_load = TRUE)
}

64
README.Rmd

@ -0,0 +1,64 @@
---
output: rmarkdown::github_document
---
# mactheknife
Read 'macOS' .DS_Store' Files
## Description
A thin wrapper around the 'Python' 'dsstore' module <https://github.com/gehaxelt/Python-dsstore> by 'Sebastian Neef'.
## NOTE
- This may turn into a broader "macOS hacking" package
- Uses `reticulate` so a working Python implementation is needed
## What's Inside The Tin
- `read_dsstore`: Read a '.DS_Store' file
The following functions are implemented:
## Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/mactheknife")
```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
options(width=120)
```
## Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
library(mactheknife)
# current verison
packageVersion("mactheknife")
```
## Built-in data
```{r}
read_dsstore(
path = system.file("extdat", "DS_Store.ctf", package = "mactheknife")
)
```
## My "~/projects" folder (use your own dir as an example)
```{r}
library(magrittr)
list.files(
path = "~/projects", pattern = "\\.DS_Store",
all.files=TRUE, recursive = TRUE, full.names = TRUE
) %>%
lapply(read_dsstore) -> x
str(x)
```

84
README.md

@ -0,0 +1,84 @@
# mactheknife
Read ‘macOS’ .DS\_Store’ Files
## Description
A thin wrapper around the ‘Python’ ‘dsstore’ module
<https://github.com/gehaxelt/Python-dsstore> by ‘Sebastian Neef’.
## NOTE
- This may turn into a broader “macOS hacking” package
- Uses `reticulate` so a working Python implementation is needed
## What’s Inside The Tin
- `read_dsstore`: Read a ‘.DS\_Store’ file
The following functions are implemented:
## Installation
``` r
devtools::install_github("hrbrmstr/mactheknife")
```
## Usage
``` r
library(mactheknife)
# current verison
packageVersion("mactheknife")
```
## [1] '0.1.0'
## Built-in data
``` r
read_dsstore(
path = system.file("extdat", "DS_Store.ctf", package = "mactheknife")
)
```
## [1] "favicon.ico" "flag" "static" "templates" "vulnerable.py" "vulnerable.wsgi"
## My “~/projects” folder (use your own dir as an example)
``` r
library(magrittr)
list.files(
path = "~/projects", pattern = "\\.DS_Store",
all.files=TRUE, recursive = TRUE, full.names = TRUE
) %>%
lapply(read_dsstore) -> x
str(x)
```
## List of 21
## $ : chr [1:20] "2017-dashboard" "2017-tlapd" "cataps" "congress-privacy" ...
## $ : chr "greenery-palettes"
## $ : chr "data"
## $ : chr "data"
## $ : chr(0)
## $ : chr(0)
## $ : chr(0)
## $ : chr "packrat"
## $ : chr "lib"
## $ : chr "x86_64-apple-darwin15.6.0"
## $ : chr "3.4.0"
## $ : chr(0)
## $ : chr "data"
## $ : chr "lyme"
## $ : chr "packrat"
## $ : chr "lib"
## $ : chr "x86_64-apple-darwin15.6.0"
## $ : chr "3.4.1"
## $ : chr "plots"
## $ : chr [1:2] "top-1m.csv" "top-1m.csv.zip"
## $ : chr(0)

BIN
inst/extdat/DS_Store.ctf

Binary file not shown.

BIN
inst/modules/__pycache__/dsstore.cpython-36.pyc

Binary file not shown.

307
inst/modules/dsstore.py

@ -0,0 +1,307 @@
import struct
class ParsingError(Exception): pass
class DataBlock(object):
"""
Class for a basic DataBlock inside of the DS_Store format.
"""
def __init__(self, data, debug=False):
super(DataBlock, self).__init__()
self.data = data
self.pos = 0
self.debug = debug
def offset_read(self, length, offset=None):
"""
Returns an byte array of length from data at the given offset or pos.
If no offset is given, pos will be increased by length.
Throws ParsingError if offset+length > len(self.data)
"""
if not offset:
offset_position = self.pos
else:
offset_position = offset
if len(self.data) < offset_position+length:
raise ParsingError("Offset+Length > len(self.data)")
if not offset:
self.pos += length
value = self.data[offset_position:offset_position+length]
self._log("Reading: {}-{} => {}".format(hex(offset_position), hex(offset_position+length), value))
return value
def skip(self, length):
"""
Increases pos by length without reading data!
"""
self.pos += length
def read_filename(self):
"""
Extracts a file name from the current position.
"""
# The length of the file name in bytes.
length, = struct.unpack_from(">I", self.offset_read(4))
# The file name in UTF-16, which is two bytes per character.
filename = self.offset_read(2 * length).decode("utf-16be")
# A structure ID that I haven't found any use of.
structure_id, = struct.unpack_from(">I", self.offset_read(4))
# Now read the structure type as a string of four characters and decode it to ascii.
structure_type, = struct.unpack_from(">4s", self.offset_read(4))
structure_type = structure_type.decode()
self._log("Structure type ", structure_type)
# If we don't find a match, skip stays < 0 and we will do some magic to find the right skip due to somehow broken .DS_Store files..
skip = -1
# Source: http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod
while skip < 0:
if structure_type == "bool":
skip = 1
elif structure_type == "type" or structure_type == "long" or structure_type == "shor" or structure_type == "fwsw" or structure_type == "fwvh" or structure_type == "icvt" or structure_type == "lsvt" or structure_type == "vSrn" or structure_type == "vstl":
skip = 4
elif structure_type == "comp" or structure_type == "dutc" or structure_type == "icgo" or structure_type == "icsp" or structure_type == "logS" or structure_type == "lg1S" or structure_type == "lssp" or structure_type == "modD" or structure_type == "moDD" or structure_type == "phyS" or structure_type == "ph1S":
skip = 8
elif structure_type == "blob":
blen, = struct.unpack_from(">I", self.offset_read(4))
skip = blen
elif structure_type == "ustr" or structure_type == "cmmt" or structure_type == "extn" or structure_type == "GRP0":
blen, = struct.unpack_from(">I", self.offset_read(4))
skip = 2* blen
elif structure_type == "BKGD":
skip = 12
elif structure_type == "ICVO" or structure_type == "LSVO" or structure_type == "dscl":
skip = 1
elif structure_type == "Iloc" or structure_type == "fwi0":
skip = 16
elif structure_type == "dilc":
skip = 32
elif structure_type == "lsvo":
skip = 76
elif structure_type == "icvo":
pass
elif structure_type == "info":
pass
else:
pass
if skip <= 0:
# We somehow didn't find a matching type. Maybe this file name's length value is broken. Try to fix it!
# This is a bit voodoo and probably not the nicest way. Beware, there by dragons!
self._log("Re-reading!")
# Rewind 8 bytes, so that we can re-read structure_id and structure_type
self.skip(-1 * 2 * 0x4)
filename += self.offset_read(0x2).decode("utf-16be")
# re-read structure_id and structure_type
structure_id, = struct.unpack_from(">I", self.offset_read(4))
structure_type, = struct.unpack_from(">4s", self.offset_read(4))
structure_type = structure_type.decode()
# Look-ahead and check if we have structure_type==Iloc followed by blob.
# If so, we're interested in blob, not Iloc. Otherwise continue!
future_structure_type = struct.unpack_from(">4s", self.offset_read(4, offset=self.pos))
self._log("Re-read structure_id {} / structure_type {}".format(structure_id, structure_type))
if structure_type != "blob" and future_structure_type != "blob":
structure_type = ""
self._log("Forcing another round!")
# Skip bytes until the next (file name) block
self.skip(skip)
self._log("Filename {}".format(filename))
return filename
def _log(self, *args):
if self.debug:
print("[DEBUG] ", *args)
class DS_Store(DataBlock, object):
"""
Represents the .DS_Store file from the given binary data.
"""
def __init__(self, data, debug=False):
super(DS_Store, self).__init__(data, debug)
self.data = data
self.root = self.__read_header()
self.offsets = self.__read_offsets()
self.toc = self.__read_TOC()
self.freeList = self.__read_freelist()
self.debug = debug
def __read_header(self):
"""
Checks if self.data is actually a .DS_Store file by checking the magic bytes.
It returns the file's root block.
"""
# We read at least 32+4 bytes for the header!
if len(self.data) < 36:
raise ParsingError("Length of data is too short!")
# Check the magic bytes for .DS_Store
magic1, magic2 = struct.unpack_from(">II", self.offset_read(2*4))
if not magic1 == 0x1 and not magic2 == 0x42756431:
raise ParsingError("Magic byte 1 does not match!")
# After the magic bytes, the offset follows two times with block's size in between.
# Both offsets have to match and are the starting point of the root block
offset, size, offset2 = struct.unpack_from(">III", self.offset_read(3*4))
self._log("Offset 1: {}".format(offset))
self._log("Size: {}".format(size))
self._log("Offset 2: {}".format(offset2))
if not offset == offset2:
raise ParsingError("Offsets do not match!")
# Skip 16 bytes of unknown data...
self.skip(4*4)
return DataBlock(self.offset_read(size, offset+4), debug=self.debug)
def __read_offsets(self):
"""
Reads the offsets which follow the header.
"""
start_pos = self.root.pos
# First get the number of offsets in this file.
count, = struct.unpack_from(">I", self.root.offset_read(4))
self._log("Offset count: {}".format(count))
# Always appears to be zero!
self.root.skip(4)
# Iterate over the offsets and get the offset addresses.
offsets = []
for i in range(count):
# Address of the offset.
address, = struct.unpack_from(">I", self.root.offset_read(4))
self._log("Offset {} is {}".format(i, address))
if address == 0:
# We're only interested in non-zero values
continue
offsets.append(address)
# Calculate the end of the address space (filled with zeroes) instead of dumbly reading zero values...
section_end = start_pos + (count // 256 + 1) * 256 * 4 - count*4
# Skip to the end of the section
self.root.skip(section_end)
self._log("Skipped {} to {}".format(hex(self.root.pos + section_end), hex(self.root.pos)))
self._log("Offsets: {}".format(offsets))
return offsets
def __read_TOC(self):
"""
Reads the table of contents (TOCs) from the file.
"""
self._log("POS {}".format(hex(self.root.pos)))
# First get the number of ToC entries.
count, = struct.unpack_from(">I", self.root.offset_read(4))
self._log("Toc count: {}".format(count))
toc = {}
# Iterate over all ToCs
for i in range(count):
# Get the length of a ToC's name
toc_len, = struct.unpack_from(">b", self.root.offset_read(1))
# Read the ToC's name
toc_name, = struct.unpack_from(">{}s".format(toc_len), self.root.offset_read(toc_len))
# Read the address (block id) in the data section
block_id, = struct.unpack_from(">I", self.root.offset_read(4))
# Add all values to the dictionary
toc[toc_name.decode()]= block_id
self._log("Toc {}".format(toc))
return toc
def __read_freelist(self):
"""
Read the free list from the header.
The free list has n=0..31 buckets with the index 2^n
"""
freelist = {}
for i in range(32):
freelist[2**i] = []
# Read the amount of blocks in the specific free list.
blkcount, = struct.unpack_from(">I", self.root.offset_read(4))
for j in range(blkcount):
# Read blkcount block offsets.
free_offset, = struct.unpack_from(">I", self.root.offset_read(4))
freelist[2**i].append(free_offset)
self._log("Freelist: {}".format(freelist))
return freelist
def __block_by_id(self, block_id):
"""
Create a DataBlock from a given block ID (e.g. from the ToC)
"""
# First check if the block_id is within the offsets range
if len(self.offsets) < block_id:
raise ParsingError("BlockID out of range!")
# Get the address of the block
addr = self.offsets[block_id]
# Do some necessary bit operations to extract the offset and the size of the block.
# The address without the last 5 bits is the offset in the file
offset = (int(addr) >> 0x5 << 0x5)
# The address' last five bits are the block's size.
size = 1 << (int(addr) & 0x1f)
self._log("New block: addr {} offset {} size {}".format( addr, offset + 0x4, size))
# Return the new block
return DataBlock(self.offset_read(size, offset + 0x4), debug=self.debug)
def traverse_root(self):
"""
Traverse from the root block and extract all file names.
"""
# Get the root block from the ToC 'DSDB'
root = self.__block_by_id(self.toc['DSDB'])
# Read the following root block's ID, so that we can traverse it.
root_id, = struct.unpack(">I", root.offset_read(4))
self._log("Root-ID ", root_id)
# Read other values that we might be useful, but we're not interested in... (at least right now)
internal_block_count, = struct.unpack(">I", root.offset_read(4))
record_count, = struct.unpack(">I", root.offset_read(4))
block_count, = struct.unpack(">I", root.offset_read(4))
unknown, = struct.unpack(">I", root.offset_read(4))
# traverse from the extracted root block id.
return self.traverse(root_id)
def traverse(self, block_id):
"""
Traverses a block identified by the given block_id and extracts the file names.
"""
# Get the responsible block by it's ID
node = self.__block_by_id(block_id)
# Extract the pointer to the next block
next_pointer, = struct.unpack(">I", node.offset_read(4))
# Get the number of next blocks or records
count, = struct.unpack(">I", node.offset_read(4))
self._log("Next Ptr {} with {} ".format(hex(next_pointer), hex(count)))
filenames = []
# If a next_pointer exists (>0), iterate through the next blocks recursively
# If not, we extract all file names from the current block
if next_pointer > 0:
for i in range(0, count, 1):
# Get the block_id for the next block
next_id, = struct.unpack(">I", node.offset_read(4))
self._log("Child: {}".format(next_id))
# Traverse it recursively
files = self.traverse(next_id)
filenames += files
# Also get the filename for the current block.
filename = node.read_filename()
self._log("Filename: ", filename)
filenames.append(filename)
# Now that we traversed all childs of the next_pointer, traverse the pointer itself.
# TODO: Check if that is really necessary as the last child should be the current node... (or so?)
files = self.traverse(next_pointer)
filenames += files
else:
# We're probably in a leaf node, so extract the file names.
for i in range(0, count, 1):
f = node.read_filename()
filenames.append(f)
return filenames

21
mactheknife.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

14
man/mactheknife.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mactheknife-package.R
\docType{package}
\name{mactheknife}
\alias{mactheknife}
\alias{mactheknife-package}
\title{Read 'macOS' .DS_Store' Files}
\description{
A thin wrapper around the 'Python' 'dsstore' module
\url{https://github.com/gehaxelt/Python-dsstore} by 'Sebastian Neef'.
}
\author{
Bob Rudis (bob@rud.is)
}

21
man/read_dsstore.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-dsstore.R
\name{read_dsstore}
\alias{read_dsstore}
\title{Read a \code{.DS_Store} file}
\usage{
read_dsstore(path)
}
\arguments{
\item{path}{a path to a valid \code{.DS_Store} file (\code{\link[=path.expand]{path.expand()}} will be called)}
}
\value{
a character vector of filenames in the \code{.DS_Store} file or
a length 0 character vector if no parseable data was found
}
\description{
Read a \code{.DS_Store} file
}
\examples{
read_dsstore(system.file("extdat", "DS_Store.ctf", package = "mactheknife"))
}

2
tests/test-all.R

@ -0,0 +1,2 @@
library(testthat)
test_check("mactheknife")

6
tests/testthat/test-mactheknife.R

@ -0,0 +1,6 @@
context("minimal package functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})
Loading…
Cancel
Save