Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pyfive/btree.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,18 @@ def _filter_chunk(cls, chunk_buffer, filter_mask, filter_pipeline, itemsize):
cls._verify_fletcher32(chunk_buffer)
# strip off 4-byte checksum from end of buffer
chunk_buffer = chunk_buffer[:-4]
elif filter_id == LZF_FILTER:
try:
import lzf
except ImportError as e:
raise ModuleNotFoundError(
"LZF codec requires optional package 'python-lzf'."
) from e
uncompressed_len = struct.unpack(">H", chunk_buffer[:2])[0]
chunk_buffer = lzf.decompress(chunk_buffer, uncompressed_len)
else:
raise NotImplementedError(
"Filter with id: %i import supported" % (filter_id))
"Filter with id: %i import not supported" % (filter_id))
return chunk_buffer

@staticmethod
Expand Down Expand Up @@ -467,6 +476,7 @@ def _parse_record(self, record):
SZIP_FILTER = 4
NBIT_FILTER = 5
SCALEOFFSET_FILTER = 6
LZF_FILTER = 32000


# Attribute message B-Tree node types
Expand Down
4 changes: 3 additions & 1 deletion pyfive/dataobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pyfive.btree import BTreeV1Groups, BTreeV1RawDataChunks
from pyfive.btree import BTreeV2GroupNames, BTreeV2GroupOrders
from pyfive.btree import BTreeV2AttrCreationOrder, BTreeV2AttrNames
from pyfive.btree import GZIP_DEFLATE_FILTER, SHUFFLE_FILTER, FLETCH32_FILTER
from pyfive.btree import GZIP_DEFLATE_FILTER, SHUFFLE_FILTER, FLETCH32_FILTER, LZF_FILTER
from pyfive.misc_low_level import Heap, SymbolTable, GlobalHeap, FractalHeap, GLOBAL_HEAP_ID
from pyfive.h5d import DatasetID
from pyfive.indexing import OrthogonalIndexer, ZarrArrayStub
Expand Down Expand Up @@ -393,6 +393,8 @@ def compression(self):
return None
if GZIP_DEFLATE_FILTER in self._filter_ids:
return 'gzip'
elif LZF_FILTER in self._filter_ids:
return 'lzf'
return None

@property
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ test = [
"h5py",
"netCDF4",
"moto",
"python-lzf",
"s3fs>=2025.9.0", # v-mismatch with boto3 results in ancient s3fs
]

Expand Down
7 changes: 5 additions & 2 deletions tests/test_filter_pipeline_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ def generate_data():


@pytest.mark.parametrize("chunk_size", [None, (10, 10), (20, 20)], ids=lambda x: f"chunk_{x}")
@pytest.mark.parametrize("compression", [None, 9], ids=lambda x: f"compression_{x}")
@pytest.mark.parametrize("compression", [None, 9, "lzf"], ids=lambda x: f"compression_{x}")
@pytest.mark.parametrize("shuffle", [True, False], ids=lambda x: f"shuffle_{x}")
@pytest.mark.parametrize("fletcher32", [True, False], ids=lambda x: f"fletcher32_{x}")
def test_hdf5_filters(modular_tmp_path, generate_data, chunk_size, compression, shuffle, fletcher32):
if compression == "lzf" and chunk_size is None and shuffle is True:
pytest.xfail(reason="lzf compression requires chunk_size with shuffle=True")

data = generate_data
file_name = f"test_{chunk_size}_{compression}_{shuffle}_{fletcher32}.hdf5"
file_name = modular_tmp_path / f"test_{chunk_size}_{compression}_{shuffle}_{fletcher32}.hdf5"

with h5py.File(file_name, "w") as f:
f.create_dataset("data", data=data, chunks=chunk_size, shuffle=shuffle, fletcher32=fletcher32, compression=compression)
Expand Down
Loading