|
| 1 | +from io import SEEK_SET |
| 2 | +from pathlib import Path |
| 3 | +from typing import Optional |
| 4 | +from zlib import decompress |
| 5 | + |
| 6 | +from structlog import get_logger |
| 7 | + |
| 8 | +from unblob.file_utils import Endian, FileSystem, StructParser, read_until_past |
| 9 | +from unblob.models import ( |
| 10 | + Extractor, |
| 11 | + ExtractResult, |
| 12 | + File, |
| 13 | + HexString, |
| 14 | + StructHandler, |
| 15 | + ValidChunk, |
| 16 | +) |
| 17 | + |
| 18 | +""", |
| 19 | +The geom_uzip header follows the following structure: |
| 20 | +10 bytes shebang, with newline suffix: #!/bin/sh\n |
| 21 | +13 bytes version, with newline suffix: #V2.0 Format\n or #L3.0 Format\n |
| 22 | +105 bytes command, with null bytes suffix: (kldstat -qm g_uzip||kldload geom_uzip)>&-&&mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n\x00\x00\x00\x00\x00\x00\x00\x00\x00 |
| 23 | +""" |
| 24 | + |
| 25 | +C_DEFINITIONS = r""" |
| 26 | + typedef struct uzip_header{ |
| 27 | + char magic[10]; /* '10 bytes '*/ |
| 28 | + char version[13]; /* 13 bytes */ |
| 29 | + char format[105]; /* 105 bytes */ |
| 30 | + uint32_t block_size; /* 4 bytes */ |
| 31 | + uint32_t block_count; /* 4 bytes - Number of blocks */ |
| 32 | + uint64_t toc[block_count]; /* table of content */ |
| 33 | + } uzip_header_t; |
| 34 | +""" |
| 35 | +HEADER_STRUCT = "uzip_header_t" |
| 36 | + |
| 37 | +logger = get_logger() |
| 38 | + |
| 39 | + |
| 40 | +class UZIPExtractor(Extractor): |
| 41 | + def extract(self, inpath: Path, outdir: Path): |
| 42 | + infile = File.from_path(inpath) |
| 43 | + parser = StructParser(C_DEFINITIONS) |
| 44 | + header = parser.parse(HEADER_STRUCT, infile, Endian.BIG) |
| 45 | + fs = FileSystem(outdir) |
| 46 | + outpath = Path(inpath.stem) |
| 47 | + with fs.open(outpath, "wb+") as outfile: |
| 48 | + for current_offset, next_offset in zip(header.toc[:-1], header.toc[1:]): |
| 49 | + compressed_len = next_offset - current_offset |
| 50 | + if compressed_len == 0: |
| 51 | + continue |
| 52 | + infile.seek(current_offset, SEEK_SET) |
| 53 | + outfile.write(decompress(infile.read(compressed_len))) |
| 54 | + return ExtractResult(reports=fs.problems) |
| 55 | + |
| 56 | + |
| 57 | +class UZIPHandler(StructHandler): |
| 58 | + NAME = "uzip" |
| 59 | + PATTERNS = [ |
| 60 | + HexString( |
| 61 | + "23 21 2F 62 69 6E 2F 73 68 0A 23 (56 32 | 4c 33) 2e 30 20 46 6f 72 6d 61 74 0A" |
| 62 | + ) |
| 63 | + ] |
| 64 | + |
| 65 | + HEADER_STRUCT = HEADER_STRUCT |
| 66 | + C_DEFINITIONS = C_DEFINITIONS |
| 67 | + EXTRACTOR = UZIPExtractor() |
| 68 | + |
| 69 | + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: |
| 70 | + header = self.parse_header(file, Endian.BIG) |
| 71 | + # take the last TOC block offset, end of file is that block offset + block size, |
| 72 | + # starting from the start offset |
| 73 | + end_offset = start_offset + header.toc[-1] |
| 74 | + file.seek(end_offset, SEEK_SET) |
| 75 | + end_offset = read_until_past(file, b"\x00") |
| 76 | + return ValidChunk( |
| 77 | + start_offset=start_offset, |
| 78 | + end_offset=end_offset, |
| 79 | + ) |
0 commit comments