mactheknife/inst/modules/dsstore.py


								import struct


								class ParsingError(Exception): pass


								class DataBlock(object):

								    """

								    Class for a basic DataBlock inside of the DS_Store format.

								    """

								    def __init__(self, data, debug=False):

								        super(DataBlock, self).__init__()

								        self.data = data

								        self.pos = 0

								        self.debug = debug


								    def offset_read(self, length, offset=None):

								        """

								        Returns an byte array of length from data at the given offset or pos.

								        If no offset is given, pos will be increased by length.

								        Throws ParsingError if offset+length > len(self.data)

								        """

								        if not offset:

								            offset_position = self.pos

								        else:

								            offset_position = offset


								        if len(self.data) < offset_position+length:

								            raise ParsingError("Offset+Length > len(self.data)")


								        if not offset:

								            self.pos += length


								        value = self.data[offset_position:offset_position+length]

								        self._log("Reading: {}-{} => {}".format(hex(offset_position), hex(offset_position+length), value))

								        return value


								    def skip(self, length):

								        """

								        Increases pos by length without reading data!

								        """

								        self.pos += length


								    def read_filename(self):

								        """

								        Extracts a file name from the current position.

								        """

								        # The length of the file name in bytes.

								        length, = struct.unpack_from(">I", self.offset_read(4))

								        # The file name in UTF-16, which is two bytes per character.

								        filename = self.offset_read(2 * length).decode("utf-16be")

								        # A structure ID that I haven't found any use of.

								        structure_id, = struct.unpack_from(">I", self.offset_read(4))

								        # Now read the structure type as a string of four characters and decode it to ascii.

								        structure_type, = struct.unpack_from(">4s", self.offset_read(4))


								        structure_type = structure_type.decode()

								        self._log("Structure type ", structure_type)

								        # If we don't find a match, skip stays < 0 and we will do some magic to find the right skip due to somehow broken .DS_Store files..

								        skip = -1

								        # Source: http://search.cpan.org/~wiml/Mac-Finder-DSStore/DSStoreFormat.pod

								        while skip < 0:

								            if structure_type == "bool":

								                skip = 1

								            elif structure_type == "type" or structure_type == "long" or structure_type == "shor" or structure_type == "fwsw" or structure_type == "fwvh" or structure_type == "icvt" or structure_type == "lsvt" or structure_type == "vSrn" or structure_type == "vstl":

								                skip = 4

								            elif  structure_type == "comp" or structure_type == "dutc" or structure_type == "icgo" or structure_type == "icsp" or structure_type == "logS" or structure_type == "lg1S" or structure_type == "lssp" or structure_type == "modD" or structure_type == "moDD" or structure_type == "phyS" or structure_type == "ph1S":

								                skip = 8

								            elif structure_type == "blob":

								                blen, = struct.unpack_from(">I", self.offset_read(4))

								                skip = blen

								            elif structure_type == "ustr" or structure_type == "cmmt" or structure_type == "extn" or structure_type == "GRP0":

								                blen, = struct.unpack_from(">I", self.offset_read(4))

								                skip = 2* blen

								            elif structure_type == "BKGD":

								                skip = 12

								            elif structure_type == "ICVO" or structure_type == "LSVO" or structure_type == "dscl":

								                skip = 1

								            elif structure_type == "Iloc" or structure_type == "fwi0":

								                skip = 16

								            elif structure_type == "dilc":

								                skip = 32

								            elif structure_type == "lsvo":

								                skip = 76

								            elif structure_type == "icvo":

								                pass

								            elif structure_type == "info":

								                pass

								            else:

								                pass


								            if skip <= 0:

								                # We somehow didn't find a matching type. Maybe this file name's length value is broken. Try to fix it!

								                # This is a bit voodoo and probably not the nicest way. Beware, there by dragons!

								                self._log("Re-reading!")

								                # Rewind 8 bytes, so that we can re-read structure_id and structure_type

								                self.skip(-1 * 2 * 0x4)

								                filename += self.offset_read(0x2).decode("utf-16be")

								                # re-read structure_id and structure_type

								                structure_id, = struct.unpack_from(">I", self.offset_read(4))

								                structure_type, = struct.unpack_from(">4s", self.offset_read(4))

								                structure_type = structure_type.decode()

								                # Look-ahead and check if we have  structure_type==Iloc followed by blob.

								                # If so, we're interested in blob, not Iloc. Otherwise continue!

								                future_structure_type = struct.unpack_from(">4s", self.offset_read(4, offset=self.pos))

								                self._log("Re-read structure_id {} / structure_type {}".format(structure_id, structure_type))

								                if structure_type != "blob" and future_structure_type != "blob":

								                    structure_type = ""

								                    self._log("Forcing another round!")


								        # Skip bytes until the next (file name) block

								        self.skip(skip)

								        self._log("Filename {}".format(filename))

								        return filename


								    def _log(self, *args):

								        if self.debug:

								            print("[DEBUG] ", *args)


								class DS_Store(DataBlock, object):

								    """

								    Represents the .DS_Store file from the given binary data.

								    """

								    def __init__(self, data, debug=False):

								        super(DS_Store, self).__init__(data, debug)

								        self.data = data

								        self.root = self.__read_header()

								        self.offsets = self.__read_offsets()

								        self.toc = self.__read_TOC()

								        self.freeList = self.__read_freelist()

								        self.debug = debug


								    def __read_header(self):

								        """

								        Checks if self.data is actually a .DS_Store file by checking the magic bytes.

								        It returns the file's root block.

								        """

								        # We read at least 32+4 bytes for the header!

								        if len(self.data) < 36:

								            raise ParsingError("Length of data is too short!")


								        # Check the magic bytes for .DS_Store

								        magic1, magic2 = struct.unpack_from(">II", self.offset_read(2*4))

								        if not magic1 == 0x1 and not magic2 == 0x42756431:

								            raise ParsingError("Magic byte 1 does not match!")


								        # After the magic bytes, the offset follows two times with block's size in between.

								        # Both offsets have to match and are the starting point of the root block

								        offset, size, offset2 = struct.unpack_from(">III", self.offset_read(3*4))

								        self._log("Offset 1: {}".format(offset))

								        self._log("Size: {}".format(size))

								        self._log("Offset 2: {}".format(offset2))

								        if not offset == offset2:

								            raise ParsingError("Offsets do not match!")

								        # Skip 16 bytes of unknown data...

								        self.skip(4*4)


								        return DataBlock(self.offset_read(size, offset+4), debug=self.debug)


								    def __read_offsets(self):

								        """

								        Reads the offsets which follow the header.

								        """

								        start_pos = self.root.pos

								        # First get the number of offsets in this file.

								        count, = struct.unpack_from(">I", self.root.offset_read(4))

								        self._log("Offset count: {}".format(count))

								        # Always appears to be zero!

								        self.root.skip(4)


								        # Iterate over the offsets and get the offset addresses.

								        offsets = []

								        for i in range(count):

								            # Address of the offset.

								            address, = struct.unpack_from(">I", self.root.offset_read(4))

								            self._log("Offset {} is {}".format(i, address))

								            if address == 0:

								                # We're only interested in non-zero values

								                continue

								            offsets.append(address)


								        # Calculate the end of the address space (filled with zeroes) instead of dumbly reading zero values...

								        section_end = start_pos + (count // 256 + 1) * 256 * 4 - count*4


								        # Skip to the end of the section

								        self.root.skip(section_end)

								        self._log("Skipped {} to {}".format(hex(self.root.pos + section_end), hex(self.root.pos)))

								        self._log("Offsets: {}".format(offsets))

								        return offsets


								    def __read_TOC(self):

								        """

								        Reads the table of contents (TOCs) from the file.

								        """

								        self._log("POS {}".format(hex(self.root.pos)))

								        # First get the number of ToC entries.

								        count, = struct.unpack_from(">I", self.root.offset_read(4))

								        self._log("Toc count: {}".format(count))

								        toc = {}

								        # Iterate over all ToCs

								        for i in range(count):

								            # Get the length of a ToC's name

								            toc_len, = struct.unpack_from(">b", self.root.offset_read(1))

								            # Read the ToC's name

								            toc_name, = struct.unpack_from(">{}s".format(toc_len), self.root.offset_read(toc_len))

								            # Read the address (block id) in the data section

								            block_id, = struct.unpack_from(">I", self.root.offset_read(4))

								            # Add all values to the dictionary

								            toc[toc_name.decode()]= block_id


								        self._log("Toc {}".format(toc))

								        return toc


								    def __read_freelist(self):

								        """

								        Read the free list from the header.

								        The free list has n=0..31 buckets with the index 2^n

								        """

								        freelist = {}

								        for i in range(32):

								            freelist[2**i] = []

								            # Read the amount of blocks in the specific free list.

								            blkcount, = struct.unpack_from(">I", self.root.offset_read(4))

								            for j in range(blkcount):

								                # Read blkcount block offsets.

								                free_offset, = struct.unpack_from(">I", self.root.offset_read(4))

								                freelist[2**i].append(free_offset)


								        self._log("Freelist: {}".format(freelist))

								        return freelist


								    def __block_by_id(self, block_id):

								        """

								        Create a DataBlock from a given block ID (e.g. from the ToC)

								        """

								        # First check if the block_id is within the offsets range

								        if len(self.offsets) < block_id:

								            raise ParsingError("BlockID out of range!")


								        # Get the address of the block

								        addr = self.offsets[block_id]


								        # Do some necessary bit operations to extract the offset and the size of the block.

								        # The address without the last 5 bits is the offset in the file

								        offset = (int(addr) >> 0x5 << 0x5)

								        # The address' last five bits are the block's size.

								        size = 1 << (int(addr) & 0x1f)

								        self._log("New block: addr {} offset {} size {}".format( addr, offset + 0x4, size))

								        # Return the new block

								        return DataBlock(self.offset_read(size, offset + 0x4), debug=self.debug)


								    def traverse_root(self):

								        """

								        Traverse from the root block and extract all file names.

								        """

								        # Get the root block from the ToC 'DSDB'

								        root = self.__block_by_id(self.toc['DSDB'])

								        # Read the following root block's ID, so that we can traverse it.

								        root_id, = struct.unpack(">I", root.offset_read(4))

								        self._log("Root-ID ", root_id)


								        # Read other values that we might be useful, but we're not interested in... (at least right now)

								        internal_block_count, = struct.unpack(">I", root.offset_read(4))

								        record_count, = struct.unpack(">I", root.offset_read(4))

								        block_count, = struct.unpack(">I", root.offset_read(4))

								        unknown, = struct.unpack(">I", root.offset_read(4))


								        # traverse from the extracted root block id.

								        return self.traverse(root_id)


								    def traverse(self, block_id):

								        """

								        Traverses a block identified by the given block_id and extracts the file names.

								        """

								        # Get the responsible block by it's ID

								        node = self.__block_by_id(block_id)

								        # Extract the pointer to the next block

								        next_pointer, =  struct.unpack(">I", node.offset_read(4))

								        # Get the number of next blocks or records

								        count, =  struct.unpack(">I", node.offset_read(4))

								        self._log("Next Ptr {} with {} ".format(hex(next_pointer), hex(count)))


								        filenames = []

								        # If a next_pointer exists (>0), iterate through the next blocks recursively

								        # If not, we extract all file names from the current block

								        if next_pointer > 0:

								            for i in range(0, count, 1):

								                # Get the block_id for the next block

								                next_id, = struct.unpack(">I", node.offset_read(4))

								                self._log("Child: {}".format(next_id))

								                # Traverse it recursively

								                files = self.traverse(next_id)

								                filenames += files

								                # Also get the filename for the current block.

								                filename = node.read_filename()

								                self._log("Filename: ", filename)

								                filenames.append(filename)

								            # Now that we traversed all childs of the next_pointer, traverse the pointer itself.

								            # TODO: Check if that is really necessary as the last child should be the current node... (or so?)

								            files = self.traverse(next_pointer)

								            filenames += files

								        else:

								            # We're probably in a leaf node, so extract the file names.

								            for i in range(0, count, 1):

								                f = node.read_filename()

								                filenames.append(f)


								        return filenames