Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ofrak_core/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Added
- Add OFRAK requirements, requirement to test mapping, test specifications ([#656](https://github.com/redballoonsecurity/ofrak/pull/656))
- Add `-V, --version` flag to ofrak cli ([#652](https://github.com/redballoonsecurity/ofrak/pull/652))
- Add LZ4 compression format unpacker and packer with support for all frame types (modern, legacy, skippable) ([#661](https://github.com/redballoonsecurity/ofrak/pull/661))

### Fixed
- `build_image.py` uses `OFRAK_DIR` from `extra_build_args` to identify `pytest_ofrak` location for develop builds ([#657](https://github.com/redballoonsecurity/ofrak/pull/657/))
Expand Down
1 change: 1 addition & 0 deletions ofrak_core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ intervaltree==3.1.0
keystone-engine==0.9.2
jefferson==0.4.5;python_version>="3.8"
lief==0.16.1
lz4==4.4.4
orjson==3.10.16
pefile==2023.2.7
pycdlib==1.12.0
Expand Down
1 change: 1 addition & 0 deletions ofrak_core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class CTypesExtension(setuptools.Extension):
"keystone-engine>=0.9.2",
"jefferson>=0.4.5;python_version>='3.8'",
"lief>=0.16.1",
"lz4>=4.4.4",
"ofrak_io>=1.1.3,==1.*",
"ofrak_type>=2.3.0,==2.*",
"ofrak_patch_maker>=4.1.0,==4.*",
Expand Down
179 changes: 179 additions & 0 deletions ofrak_core/src/ofrak/core/lz4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import logging
from dataclasses import dataclass

import lz4.frame # type: ignore
import lz4.block # type: ignore

from ofrak.component.identifier import Identifier
from ofrak.component.packer import Packer
from ofrak.component.unpacker import Unpacker
from ofrak.core.binary import GenericBinary
from ofrak.core.magic import MagicDescriptionPattern, MagicMimePattern
from ofrak.resource import Resource
from ofrak_type.range import Range

LOGGER = logging.getLogger(__name__)

# LZ4 frame magic numbers (little-endian)
LZ4_MODERN_MAGIC = b"\x04\x22\x4d\x18" # 0x184D2204 - Modern/default frame
LZ4_LEGACY_MAGIC = b"\x02\x21\x4c\x18" # 0x184C2102 - Legacy frame
# Skippable frames: 0x184D2A50 to 0x184D2A5F (16 variants)
# Format: 0x5X 0x2A 0x4D 0x18 where X is 0-F


@dataclass
class Lz4Data(GenericBinary):
"""
Base class for LZ4 binary blobs.
LZ4 is a high-speed lossless compression algorithm.
"""


@dataclass
class Lz4ModernData(Lz4Data):
"""
LZ4 modern frame format (default).
The modern LZ4 frame format includes:
- Frame descriptor with flags
- Optional content size and dictionary ID
- Block independence flags
- Optional checksums (content and block)
- End mark
"""


@dataclass
class Lz4LegacyData(Lz4Data):
"""
LZ4 legacy frame format.
Older LZ4 format predating the frame specification:
- Simpler structure
- No checksums or metadata
- Fixed 8MB max block size
- Deprecated but still encountered in the wild
"""


@dataclass
class Lz4SkippableData(Lz4Data):
"""
LZ4 skippable frame.
Special frame type for embedding metadata or application-specific data:
- Not compressed data
- Contains arbitrary bytes
- LZ4 parsers can safely skip these frames
- Typically used alongside regular frames
"""
Comment on lines +33 to +70
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having these other types isn't actually helpful. We just want one tag for regular Lz4Data.



class Lz4Identifier(Identifier):
"""
Identify LZ4 compressed data by checking magic bytes.
Recognizes all LZ4 frame types:
- Modern/default frames (0x184D2204)
- Legacy frames (0x184C2102)
- Skippable frames (0x184D2A50-0x184D2A5F)
"""

id = b"Lz4Identifier"
targets = (GenericBinary,)

async def identify(self, resource: Resource, config=None) -> None:
data = await resource.get_data(Range(0, 4))

if len(data) < 4:
return

# Check for modern frame
if data == LZ4_MODERN_MAGIC:
resource.add_tag(Lz4ModernData)
return

# Check for legacy frame
if data == LZ4_LEGACY_MAGIC:
resource.add_tag(Lz4LegacyData)
return

# Check for skippable frames
# Format: 0x5X 0x2A 0x4D 0x18 where X is 0-F
if data[1:4] == b"\x2a\x4d\x18" and 0x50 <= data[0] <= 0x5F:
resource.add_tag(Lz4SkippableData)
return
Comment on lines +73 to +106
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It registers magic mime identifiers, so we don't also need this identifier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually do need these -- without them images are not tagged correctly. The Magic mime ones can probably be removed.



class Lz4Unpacker(Unpacker[None]):
"""
Unpack (decompress) LZ4 files of all frame types.
Supports:
- Modern frame format (most common)
- Legacy frame format (deprecated)
- Skippable frames (metadata containers)
"""

id = b"Lz4Unpacker"
targets = (Lz4ModernData, Lz4LegacyData, Lz4SkippableData)
children = (GenericBinary,)

async def unpack(self, resource: Resource, config=None):
"""
Unpack LZ4 data.
:param resource: The LZ4 resource to unpack
:param config: Optional unpacker configuration
:raises RuntimeError: if the data is not valid LZ4 format
"""
resource_data = await resource.get_data()

try:
decompressed_data = lz4.frame.decompress(resource_data)
except RuntimeError as e:
LOGGER.error(f"Failed to decompress LZ4 data: {e}")
raise

await resource.create_child(
tags=(GenericBinary,),
data=decompressed_data,
)


class Lz4Packer(Packer[None]):
"""
Pack data into a compressed LZ4 file using modern frame format.
Note: Only creates modern frame format. Legacy frames and skippable frames
cannot be repacked:
- Legacy format is deprecated and not supported by the Python lz4 library
- Skippable frames are metadata containers and don't make semantic sense to pack
If you unpack a legacy or skippable frame and repack, it will be converted
to modern frame format.
"""

targets = (Lz4ModernData,)

async def pack(self, resource: Resource, config=None):
"""
Pack data into LZ4 modern frame format.
:param resource: The LZ4 resource to pack
:param config: Optional packer configuration
"""
lz4_child = await resource.get_only_child()
child_data = await lz4_child.get_data()

lz4_compressed = lz4.frame.compress(child_data)

original_size = await resource.get_data_length()
resource.queue_patch(Range(0, original_size), lz4_compressed)


# Register magic patterns for automatic identification
MagicMimePattern.register(Lz4Data, "application/x-lz4")
MagicDescriptionPattern.register(Lz4Data, lambda s: s.lower().startswith("lz4 compressed data"))
3 changes: 3 additions & 0 deletions ofrak_core/tests/components/assets/lz4_skip.bin
Git LFS file not shown
Loading
Loading