From 5fcca76d2d6b34c8fe345d3b254fec14ed7964a9 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Sun, 29 Apr 2018 08:12:22 -0400 Subject: [PATCH] initial commit --- .Rbuildignore | 11 + .codecov.yml | 1 + .gitignore | 8 + .travis.yml | 6 + DESCRIPTION | 26 ++ LICENSE | 2 + NAMESPACE | 4 + NEWS.md | 2 + R/mactheknife-package.R | 11 + R/read-dsstore.R | 29 +++ R/zzz.R | 11 + README.Rmd | 64 +++++ README.md | 84 +++++++ inst/extdat/DS_Store.ctf | Bin 0 -> 6148 bytes inst/modules/__pycache__/dsstore.cpython-36.pyc | Bin 0 -> 8037 bytes inst/modules/dsstore.py | 307 ++++++++++++++++++++++++ mactheknife.Rproj | 21 ++ man/mactheknife.Rd | 14 ++ man/read_dsstore.Rd | 21 ++ tests/test-all.R | 2 + tests/testthat/test-mactheknife.R | 6 + 21 files changed, 630 insertions(+) create mode 100644 .Rbuildignore create mode 100644 .codecov.yml create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 DESCRIPTION create mode 100644 LICENSE create mode 100644 NAMESPACE create mode 100644 NEWS.md create mode 100644 R/mactheknife-package.R create mode 100644 R/read-dsstore.R create mode 100644 R/zzz.R create mode 100644 README.Rmd create mode 100644 README.md create mode 100644 inst/extdat/DS_Store.ctf create mode 100644 inst/modules/__pycache__/dsstore.cpython-36.pyc create mode 100644 inst/modules/dsstore.py create mode 100644 mactheknife.Rproj create mode 100644 man/mactheknife.Rd create mode 100644 man/read_dsstore.Rd create mode 100644 tests/test-all.R create mode 100644 tests/testthat/test-mactheknife.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..70baf05 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,11 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.travis\.yml$ +^README\.*Rmd$ +^README\.*html$ +^NOTES\.*Rmd$ +^NOTES\.*html$ +^\.codecov\.yml$ +^README_files$ +^doc$ +^tmp$ diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1 @@ +comment: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cce1f17 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +.Rproj.user +.Rhistory +.RData +.Rproj +src/*.o +src/*.so +src/*.dll diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..f93993f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: R +sudo: false +cache: packages + +after_success: +- Rscript -e 'covr::codecov()' diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..24955e2 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,26 @@ +Package: mactheknife +Type: Package +Title: Read 'macOS' .DS_Store' Files +Version: 0.1.0 +Date: 2018-04-29 +Authors@R: c( + person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), + comment = c(ORCID = "0000-0001-5670-2640")), + person("Sebastian", "Neef", email = "github@gehaxelt.in", role = c("aut", "cph"), + comment = "Python dsstore module ") + ) +Maintainer: Bob Rudis +Description: A thin wrapper around the 'Python' 'dsstore' module + by 'Sebastian Neef'. +URL: https://github.com/hrbrmstr/mactheknife +BugReports: https://github.com/hrbrmstr/mactheknife/issues +SystemRequirements: Python +Encoding: UTF-8 +License: MIT + file LICENSE +Suggests: + testthat, + covr +Depends: + R (>= 3.2.0), + reticulate +RoxygenNote: 6.0.1.9000 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e7949b3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2018 +COPYRIGHT HOLDER: Bob Rudis diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..bdb1d58 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,4 @@ +# Generated by roxygen2: do not edit by hand + +export(read_dsstore) +import(reticulate) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..9b4679b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,2 @@ +0.1.0 +* Initial release diff --git a/R/mactheknife-package.R b/R/mactheknife-package.R new file mode 100644 index 0000000..24c8153 --- /dev/null +++ b/R/mactheknife-package.R @@ -0,0 +1,11 @@ +#' Read 'macOS' .DS_Store' Files +#' +#' A thin wrapper around the 'Python' 'dsstore' module +#' by 'Sebastian Neef'. +#' +#' @md +#' @name mactheknife +#' @docType package +#' @author Bob Rudis (bob@@rud.is) +#' @import reticulate +NULL diff --git a/R/read-dsstore.R b/R/read-dsstore.R new file mode 100644 index 0000000..fc1da84 --- /dev/null +++ b/R/read-dsstore.R @@ -0,0 +1,29 @@ +#' Read a `.DS_Store` file +#' +#' @md +#' @param path a path to a valid `.DS_Store` file ([path.expand()] will be called) +#' @return a character vector of filenames in the `.DS_Store` file or +#' a length 0 character vector if no parseable data was found +#' @export +#' @examples +#' read_dsstore(system.file("extdat", "DS_Store.ctf", package = "mactheknife")) +read_dsstore <- function(path) { + + stor_path <- path.expand(path) + stor_path <- normalizePath(stor_path) + + fil <- os$open(stor_path, os$O_RDONLY) + contents <- os$read(fil, as.integer(file.size(stor_path))) + os$close(fil) + + d <- dsstore$DS_Store(contents) + + ds_fils <- d$traverse_root() + + out <- unique(ds_fils) + + if (length(out) == 0) out <- character() + + out + +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..d784981 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,11 @@ +dsstore <- NULL +os <- NULL + +.onLoad <- function(libname, pkgname) { + dsstore <<- reticulate::import_from_path( + module = "dsstore", + path = system.file("modules", package = "mactheknife"), + delay_load = TRUE + ) + os <<- reticulate::import("os", delay_load = TRUE) +} \ No newline at end of file diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..2eb23c9 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,64 @@ +--- +output: rmarkdown::github_document +--- + +# mactheknife + +Read 'macOS' .DS_Store' Files + +## Description + +A thin wrapper around the 'Python' 'dsstore' module by 'Sebastian Neef'. + +## NOTE + +- This may turn into a broader "macOS hacking" package +- Uses `reticulate` so a working Python implementation is needed + +## What's Inside The Tin + +- `read_dsstore`: Read a '.DS_Store' file + +The following functions are implemented: + +## Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/mactheknife") +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} +options(width=120) +``` + +## Usage + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(mactheknife) + +# current verison +packageVersion("mactheknife") + +``` + +## Built-in data + +```{r} +read_dsstore( + path = system.file("extdat", "DS_Store.ctf", package = "mactheknife") +) +``` + +## My "~/projects" folder (use your own dir as an example) + +```{r} +library(magrittr) + +list.files( + path = "~/projects", pattern = "\\.DS_Store", + all.files=TRUE, recursive = TRUE, full.names = TRUE +) %>% + lapply(read_dsstore) -> x + +str(x) +``` diff --git a/README.md b/README.md new file mode 100644 index 0000000..44718e9 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ + +# mactheknife + +Read ‘macOS’ .DS\_Store’ Files + +## Description + +A thin wrapper around the ‘Python’ ‘dsstore’ module + by ‘Sebastian Neef’. + +## NOTE + + - This may turn into a broader “macOS hacking” package + - Uses `reticulate` so a working Python implementation is needed + +## What’s Inside The Tin + + - `read_dsstore`: Read a ‘.DS\_Store’ file + +The following functions are implemented: + +## Installation + +``` r +devtools::install_github("hrbrmstr/mactheknife") +``` + +## Usage + +``` r +library(mactheknife) + +# current verison +packageVersion("mactheknife") +``` + + ## [1] '0.1.0' + +## Built-in data + +``` r +read_dsstore( + path = system.file("extdat", "DS_Store.ctf", package = "mactheknife") +) +``` + + ## [1] "favicon.ico" "flag" "static" "templates" "vulnerable.py" "vulnerable.wsgi" + +## My “~/projects” folder (use your own dir as an example) + +``` r +library(magrittr) + +list.files( + path = "~/projects", pattern = "\\.DS_Store", + all.files=TRUE, recursive = TRUE, full.names = TRUE +) %>% + lapply(read_dsstore) -> x + +str(x) +``` + + ## List of 21 + ## $ : chr [1:20] "2017-dashboard" "2017-tlapd" "cataps" "congress-privacy" ... + ## $ : chr "greenery-palettes" + ## $ : chr "data" + ## $ : chr "data" + ## $ : chr(0) + ## $ : chr(0) + ## $ : chr(0) + ## $ : chr "packrat" + ## $ : chr "lib" + ## $ : chr "x86_64-apple-darwin15.6.0" + ## $ : chr "3.4.0" + ## $ : chr(0) + ## $ : chr "data" + ## $ : chr "lyme" + ## $ : chr "packrat" + ## $ : chr "lib" + ## $ : chr "x86_64-apple-darwin15.6.0" + ## $ : chr "3.4.1" + ## $ : chr "plots" + ## $ : chr [1:2] "top-1m.csv" "top-1m.csv.zip" + ## $ : chr(0) diff --git a/inst/extdat/DS_Store.ctf b/inst/extdat/DS_Store.ctf new file mode 100644 index 0000000000000000000000000000000000000000..0c2165b78a242c92b6c4bef0926c1d8a2e6d66b3 GIT binary patch literal 6148 zcmeHK(MrQG6g{bBYz%RaKIYLU!9OTt3cl*QXzLtG~zV!qB06)P0&~uY8NxHp? z%Dr%NlJuOUJt<8Cz>GKdXTS(RpDCCPSnLS7F4>7J^z?|vjZvY&0;iax>aexrH##6| zH^c+(@a$IYd;OZrx~a;#Danx64G&Nr>drI7W1-?gxA9gYx34Ie$2&N!X1z` z#Uqw(WT~%Wg}F);hrI6%*$Z-?I7`X;lyBe?uZi+KQ6BA4zVLo$n8)huCQ-Agp$@16 z>cF3MfP1z?f9O$5bwC|Z2euB#_rWp+i-4_1_vv8ajsV1*-7Z*{pGDEwfJMO8Bd;)w zmlD0ygd>LWa`xN6D+0D2y&NVSK1`U|gcFK!vvYjw=rDyxE!6>aAa>xuH%GGnpYK2a z$0BX11M0xPa=_%1>&cjp6!zB6!^vK2nYT;{;k6#O5?0(+Os;Ij$4nROw^AV%0b7sk QVd#&5vq1}W;71+!0?evl1^@s6 literal 0 HcmV?d00001 diff --git a/inst/modules/__pycache__/dsstore.cpython-36.pyc b/inst/modules/__pycache__/dsstore.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6965c991955508671f36f23411fa94ea2ff67721 GIT binary patch literal 8037 zcmb7JPjDO8dEft7fFMOt6h+Ify>eVzuq@J)9m`Q@dP@#_Y|}ryb$ZGqnY6#}dy55#FqtVZc>BHo zZ{PRt_kDP0db;w_Km5D@AMa|~ziDG18}uFg;#Cwv6M9$kwfmYdg!#E9%s@XjeFJq% z*r?lqiMoZlBTA^30t+pcZwq}(tGOpwL)&apIsE7#?%)?!Q3RT=3(Yrpw2@UmYsk1A z?tCm|BsXhj=GN3h=(qq^lt0&eM^wZVo+VKg(|Ec+&@9a_<8o%R@_Hj_ z-0eoqy>z|e;j`9l#Ie_kq}TAa8*#ho6(c;Hye$GRYI%4WZ+)x2MQ5kM`;BCYr{8PR zE6|6Y!(aRwDv6fpeeKxj7{{8f^+DH7L8a9b7VkiUNSs~6FP13!MqKS{9c^HSIQ77!H^TeqeY3A$()Kj@W`cJ**1xA? zbqmdZLBfoFFpbadvSiO;CNF8jW`h4FW+P||M9Mj4r6azVZ3%LU+`Lfq*a=B zl{mCS2dfqDlD_n3B&E$vtg2W}+LBrI zZnTp*N)j`3H#n-wb&kwYm*AgM&pO_+owTDcv!6D)yS~hW(TDV-ka`O` zoM;`LG__+0$ojGIXZk?ZAg}X~*A6Vtu|6=)7-w>ueQK`Nw>swWQ2rfAyP<31{ek_Z zag_F9wf3|x4f%_{CCYn-{6dt_qlZ61y|QP-|AOaK{;cND>HIm9KhNdQ+5CAvf6igO zp9_Q66IkcMp7Es~uY;N|sEY-)P*9f&YO$bRD5%Q?^Ah@Jg~$6#!jzr zJv`&CoW6rG|BNvn&;DCbR|@LYg8B-t@RPGv5N6+**RaYLnEM)!`>FV2)SsgMI3?_-bx{-=p3q-EWh9w)=K|MoZkpY?5~L_6^wK+v!Jzx_x|r}3zJlK7P_JiG(trIc`zO~(r zwtcG?C)l>RzmH9VqXe7WfArbgzIFE-_t#IzH2c=Z+Mhi1tp{6Fin!TD3jhY4viB*=UiYw{VJ+O>9_Iz;izxc>Eu=>ewLA8I0qUL6f+98|j zg#_X1ThD-cpHUg&l~FN>Q%U?8QsPA6Z}G;oQgh=@+!&-zK;Gf ze$+y=`Wbi{mTOiFj5X%b>za%DoIb0=(ODHd(@P_Faq0|(-x#{O`*f%U6woMKO`;2< zum8yS8=yYMc;@D>-0!S^eD_=Tzw3dwOBe~iMbIs?4`e$`axa!y4Y?CfxG}!@n06pN z((nS~evDv1O}|&uH)|CM&!tHkB?-r6rFvaNO+Z2dg3L|D!0S{cerD4-Y-|SI?hrcC z_2ZC)0hj>{NNGI|9$XXG<5VuXj=RA~csimi zDtMN}l&IqAifJ)}XBohL7SD<}FXr%^Vi;eAX`0X6AsEk5{G;GN1~H6K%rJ0i*saVJ z!$h)F3Lb8^!-jmuh+@fO8~1xq{HJ)05sC+bhpinVd9t|2;CP*Uy~7k)VV9{gs0ve4 zjP)APgg7coVl zB(wmD&l-O5Pf;YKCU7%CNBFRiFvR+vo|p&?O!U}dmLZoh0Kn?&!r3#bn*5}1h?1f( z!=-?OXV694;ylkV2j;*UFgwH)xx(C;k(;cd4#<`}J4Z~T4%f$Zvu^_~{T3c2r!IQK(xju+CuDxBlmZqVF|DH149zlZ1o{t^Lm_ZfWVgg64^ zHfavoYLd$R#ty`2rO$qG+#(-re-2Zx)+-E1rmg7%@_UBPGq%G+=7jeuHZl)uZ?MxuaEz1rxoX^)Fs42%-k5@Yjum6N70rSFFc-)R zSc|az^o2h$Dn`06CfZ|D3;8D9DCGNpP=S;{z6W|@0OnEHz(FrdC`mNQtJ!f1s73uH z3Nv(V2VwTngT$r2b?v)%2G*Htkc8X9!WtH2m}dfZLWbpEKKZ0?cFGFWJ^2I>4<<`E zmysj7gly3~GD`E>CBQp+E@}gZPgm+Yxn3X?={QM+TlKy}q4}^z=PyQ;vG-8BOlU(T zy21H%5CVQD_NU^YNzrwk=AK3{{!A`&`F;Etb7K#MX1T_K zK5JCTr-Gs=+t6oB&#daND*%#boM2U1^zs$Xmc4((io`jgcOZM9U{T4!u_d!%165mz z0+1r4MNYA!_w!M01SCfS@`pO)?@y8Lp?M^4CS(*+!t*R{EnyzkSAA?-DNMGxZ*nID zdT@hwr+lp4)}B1z&_jc)kwfM4allbd*UpflqyextrJ!f)099KA4U zh%l#+1QI#=v6uY<1bC?*brVPf^YSkS#ty6jWWcxp9wa&iqDn?6mBD;9kKTuGhID}U zZ^8DQHO3yU@d9j6Zf91%tv#7xo2!xDyIB60dlglfH z?Ka?*K6ra+>75mCyVu+c0P`q6#H+PK5gd6p{yZ!;l3&3Eu(ultLG@k^eAq0wl9@QN z2y7)N;E{KzC_GOscQuTb=*Wu2;O8DoRHk?QDLQCf>EM9{?gOf*t0G83WJh_xKPD4@Yvbd^7qq(KP8umj@>`2kHQ)lvny zdqNRPp|_twr~ea83KZLfA}SWC>*8tTFgCobL@dwXX!e@j+k!uhh z(jt@{@_=?n>7g+w>s#wv>vu=G=sp69y$*)gN~c1fu}f0sl=L%mR8C?92{RYl@ri~8 zn6%d~(W$@=e;#ZOWZ3BDOkzd#s}hL}De9^=rS?)fTY2-p{h_&GO}=kfQsY_$%W!SmfvYpifWGi zbHYZOR287nhMHP?mi+nAZc>79;BUfXlO4Eo##$;@=s@X9x${N9L-fGy+kF>VNtgq2 z)dLEDEK%y0IqJ1L6;RmmNsEoWORi!xC%k&}`l;O)058=#6}bLNzru5Wfw?vW%uO4= zBVd$XWWIR#*DXxit+x6fW-M|igF;ELF|ku%lRyBcCCB7$BysbbkWufmgsld94%iHa zk_rlx*LK@o!GN=JFHdCM@_-&Uz&aI{P%dC3gPZ_kkf=P#H>j5rD|bL)A_2$TmW0Tk z`PC370EA@OWWC6NVVsZHj-@+1^-kOj~Nn6c8h~ldtG`m7+F#WP& znXWzuY8DAC*FaJXND|Sx2~-IeGZvu_Wt7rJ(-S!(xq>BCVgmnBYw|rR-lyUg6?_}t zB#Phn22q7+Je4$}1jiwM?CUsbRCZr len(self.data) + """ + if not offset: + offset_position = self.pos + else: + offset_position = offset + + if len(self.data) < offset_position+length: + raise ParsingError("Offset+Length > len(self.data)") + + if not offset: + self.pos += length + + value = self.data[offset_position:offset_position+length] + self._log("Reading: {}-{} => {}".format(hex(offset_position), hex(offset_position+length), value)) + return value + + def skip(self, length): + """ + Increases pos by length without reading data! + """ + self.pos += length + + def read_filename(self): + """ + Extracts a file name from the current position. + """ + # The length of the file name in bytes. + length, = struct.unpack_from(">I", self.offset_read(4)) + # The file name in UTF-16, which is two bytes per character. + filename = self.offset_read(2 * length).decode("utf-16be") + # A structure ID that I haven't found any use of. + structure_id, = struct.unpack_from(">I", self.offset_read(4)) + # Now read the structure type as a string of four characters and decode it to ascii. + structure_type, = struct.unpack_from(">4s", self.offset_read(4)) + + structure_type = structure_type.decode() + self._log("Structure type ", structure_type) + # If we don't find a match, skip stays < 0 and we will do some magic to find the right skip due to somehow broken .DS_Store files.. + skip = -1 + # Source: http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod + while skip < 0: + if structure_type == "bool": + skip = 1 + elif structure_type == "type" or structure_type == "long" or structure_type == "shor" or structure_type == "fwsw" or structure_type == "fwvh" or structure_type == "icvt" or structure_type == "lsvt" or structure_type == "vSrn" or structure_type == "vstl": + skip = 4 + elif structure_type == "comp" or structure_type == "dutc" or structure_type == "icgo" or structure_type == "icsp" or structure_type == "logS" or structure_type == "lg1S" or structure_type == "lssp" or structure_type == "modD" or structure_type == "moDD" or structure_type == "phyS" or structure_type == "ph1S": + skip = 8 + elif structure_type == "blob": + blen, = struct.unpack_from(">I", self.offset_read(4)) + skip = blen + elif structure_type == "ustr" or structure_type == "cmmt" or structure_type == "extn" or structure_type == "GRP0": + blen, = struct.unpack_from(">I", self.offset_read(4)) + skip = 2* blen + elif structure_type == "BKGD": + skip = 12 + elif structure_type == "ICVO" or structure_type == "LSVO" or structure_type == "dscl": + skip = 1 + elif structure_type == "Iloc" or structure_type == "fwi0": + skip = 16 + elif structure_type == "dilc": + skip = 32 + elif structure_type == "lsvo": + skip = 76 + elif structure_type == "icvo": + pass + elif structure_type == "info": + pass + else: + pass + + if skip <= 0: + # We somehow didn't find a matching type. Maybe this file name's length value is broken. Try to fix it! + # This is a bit voodoo and probably not the nicest way. Beware, there by dragons! + self._log("Re-reading!") + # Rewind 8 bytes, so that we can re-read structure_id and structure_type + self.skip(-1 * 2 * 0x4) + filename += self.offset_read(0x2).decode("utf-16be") + # re-read structure_id and structure_type + structure_id, = struct.unpack_from(">I", self.offset_read(4)) + structure_type, = struct.unpack_from(">4s", self.offset_read(4)) + structure_type = structure_type.decode() + # Look-ahead and check if we have structure_type==Iloc followed by blob. + # If so, we're interested in blob, not Iloc. Otherwise continue! + future_structure_type = struct.unpack_from(">4s", self.offset_read(4, offset=self.pos)) + self._log("Re-read structure_id {} / structure_type {}".format(structure_id, structure_type)) + if structure_type != "blob" and future_structure_type != "blob": + structure_type = "" + self._log("Forcing another round!") + + + # Skip bytes until the next (file name) block + self.skip(skip) + self._log("Filename {}".format(filename)) + return filename + + def _log(self, *args): + if self.debug: + print("[DEBUG] ", *args) + +class DS_Store(DataBlock, object): + """ + Represents the .DS_Store file from the given binary data. + """ + def __init__(self, data, debug=False): + super(DS_Store, self).__init__(data, debug) + self.data = data + self.root = self.__read_header() + self.offsets = self.__read_offsets() + self.toc = self.__read_TOC() + self.freeList = self.__read_freelist() + self.debug = debug + + def __read_header(self): + """ + Checks if self.data is actually a .DS_Store file by checking the magic bytes. + It returns the file's root block. + """ + # We read at least 32+4 bytes for the header! + if len(self.data) < 36: + raise ParsingError("Length of data is too short!") + + # Check the magic bytes for .DS_Store + magic1, magic2 = struct.unpack_from(">II", self.offset_read(2*4)) + if not magic1 == 0x1 and not magic2 == 0x42756431: + raise ParsingError("Magic byte 1 does not match!") + + # After the magic bytes, the offset follows two times with block's size in between. + # Both offsets have to match and are the starting point of the root block + offset, size, offset2 = struct.unpack_from(">III", self.offset_read(3*4)) + self._log("Offset 1: {}".format(offset)) + self._log("Size: {}".format(size)) + self._log("Offset 2: {}".format(offset2)) + if not offset == offset2: + raise ParsingError("Offsets do not match!") + # Skip 16 bytes of unknown data... + self.skip(4*4) + + return DataBlock(self.offset_read(size, offset+4), debug=self.debug) + + def __read_offsets(self): + """ + Reads the offsets which follow the header. + """ + start_pos = self.root.pos + # First get the number of offsets in this file. + count, = struct.unpack_from(">I", self.root.offset_read(4)) + self._log("Offset count: {}".format(count)) + # Always appears to be zero! + self.root.skip(4) + + # Iterate over the offsets and get the offset addresses. + offsets = [] + for i in range(count): + # Address of the offset. + address, = struct.unpack_from(">I", self.root.offset_read(4)) + self._log("Offset {} is {}".format(i, address)) + if address == 0: + # We're only interested in non-zero values + continue + offsets.append(address) + + # Calculate the end of the address space (filled with zeroes) instead of dumbly reading zero values... + section_end = start_pos + (count // 256 + 1) * 256 * 4 - count*4 + + # Skip to the end of the section + self.root.skip(section_end) + self._log("Skipped {} to {}".format(hex(self.root.pos + section_end), hex(self.root.pos))) + self._log("Offsets: {}".format(offsets)) + return offsets + + def __read_TOC(self): + """ + Reads the table of contents (TOCs) from the file. + """ + self._log("POS {}".format(hex(self.root.pos))) + # First get the number of ToC entries. + count, = struct.unpack_from(">I", self.root.offset_read(4)) + self._log("Toc count: {}".format(count)) + toc = {} + # Iterate over all ToCs + for i in range(count): + # Get the length of a ToC's name + toc_len, = struct.unpack_from(">b", self.root.offset_read(1)) + # Read the ToC's name + toc_name, = struct.unpack_from(">{}s".format(toc_len), self.root.offset_read(toc_len)) + # Read the address (block id) in the data section + block_id, = struct.unpack_from(">I", self.root.offset_read(4)) + # Add all values to the dictionary + toc[toc_name.decode()]= block_id + + self._log("Toc {}".format(toc)) + return toc + + def __read_freelist(self): + """ + Read the free list from the header. + The free list has n=0..31 buckets with the index 2^n + """ + freelist = {} + for i in range(32): + freelist[2**i] = [] + # Read the amount of blocks in the specific free list. + blkcount, = struct.unpack_from(">I", self.root.offset_read(4)) + for j in range(blkcount): + # Read blkcount block offsets. + free_offset, = struct.unpack_from(">I", self.root.offset_read(4)) + freelist[2**i].append(free_offset) + + self._log("Freelist: {}".format(freelist)) + return freelist + + def __block_by_id(self, block_id): + """ + Create a DataBlock from a given block ID (e.g. from the ToC) + """ + # First check if the block_id is within the offsets range + if len(self.offsets) < block_id: + raise ParsingError("BlockID out of range!") + + # Get the address of the block + addr = self.offsets[block_id] + + # Do some necessary bit operations to extract the offset and the size of the block. + # The address without the last 5 bits is the offset in the file + offset = (int(addr) >> 0x5 << 0x5) + # The address' last five bits are the block's size. + size = 1 << (int(addr) & 0x1f) + self._log("New block: addr {} offset {} size {}".format( addr, offset + 0x4, size)) + # Return the new block + return DataBlock(self.offset_read(size, offset + 0x4), debug=self.debug) + + def traverse_root(self): + """ + Traverse from the root block and extract all file names. + """ + # Get the root block from the ToC 'DSDB' + root = self.__block_by_id(self.toc['DSDB']) + # Read the following root block's ID, so that we can traverse it. + root_id, = struct.unpack(">I", root.offset_read(4)) + self._log("Root-ID ", root_id) + + # Read other values that we might be useful, but we're not interested in... (at least right now) + internal_block_count, = struct.unpack(">I", root.offset_read(4)) + record_count, = struct.unpack(">I", root.offset_read(4)) + block_count, = struct.unpack(">I", root.offset_read(4)) + unknown, = struct.unpack(">I", root.offset_read(4)) + + # traverse from the extracted root block id. + return self.traverse(root_id) + + def traverse(self, block_id): + """ + Traverses a block identified by the given block_id and extracts the file names. + """ + # Get the responsible block by it's ID + node = self.__block_by_id(block_id) + # Extract the pointer to the next block + next_pointer, = struct.unpack(">I", node.offset_read(4)) + # Get the number of next blocks or records + count, = struct.unpack(">I", node.offset_read(4)) + self._log("Next Ptr {} with {} ".format(hex(next_pointer), hex(count))) + + filenames = [] + # If a next_pointer exists (>0), iterate through the next blocks recursively + # If not, we extract all file names from the current block + if next_pointer > 0: + for i in range(0, count, 1): + # Get the block_id for the next block + next_id, = struct.unpack(">I", node.offset_read(4)) + self._log("Child: {}".format(next_id)) + # Traverse it recursively + files = self.traverse(next_id) + filenames += files + # Also get the filename for the current block. + filename = node.read_filename() + self._log("Filename: ", filename) + filenames.append(filename) + # Now that we traversed all childs of the next_pointer, traverse the pointer itself. + # TODO: Check if that is really necessary as the last child should be the current node... (or so?) + files = self.traverse(next_pointer) + filenames += files + else: + # We're probably in a leaf node, so extract the file names. + for i in range(0, count, 1): + f = node.read_filename() + filenames.append(f) + + return filenames diff --git a/mactheknife.Rproj b/mactheknife.Rproj new file mode 100644 index 0000000..446d9e1 --- /dev/null +++ b/mactheknife.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --resave-data +PackageRoxygenize: rd,collate,namespace diff --git a/man/mactheknife.Rd b/man/mactheknife.Rd new file mode 100644 index 0000000..fdcad75 --- /dev/null +++ b/man/mactheknife.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mactheknife-package.R +\docType{package} +\name{mactheknife} +\alias{mactheknife} +\alias{mactheknife-package} +\title{Read 'macOS' .DS_Store' Files} +\description{ +A thin wrapper around the 'Python' 'dsstore' module +\url{https://github.com/gehaxelt/Python-dsstore} by 'Sebastian Neef'. +} +\author{ +Bob Rudis (bob@rud.is) +} diff --git a/man/read_dsstore.Rd b/man/read_dsstore.Rd new file mode 100644 index 0000000..9e48185 --- /dev/null +++ b/man/read_dsstore.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read-dsstore.R +\name{read_dsstore} +\alias{read_dsstore} +\title{Read a \code{.DS_Store} file} +\usage{ +read_dsstore(path) +} +\arguments{ +\item{path}{a path to a valid \code{.DS_Store} file (\code{\link[=path.expand]{path.expand()}} will be called)} +} +\value{ +a character vector of filenames in the \code{.DS_Store} file or +a length 0 character vector if no parseable data was found +} +\description{ +Read a \code{.DS_Store} file +} +\examples{ +read_dsstore(system.file("extdat", "DS_Store.ctf", package = "mactheknife")) +} diff --git a/tests/test-all.R b/tests/test-all.R new file mode 100644 index 0000000..bfd9c4c --- /dev/null +++ b/tests/test-all.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("mactheknife") diff --git a/tests/testthat/test-mactheknife.R b/tests/testthat/test-mactheknife.R new file mode 100644 index 0000000..0c22968 --- /dev/null +++ b/tests/testthat/test-mactheknife.R @@ -0,0 +1,6 @@ +context("minimal package functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})