Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions pyfive/h5d.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
# throws a flake8 wobbly for Python<3.10; match is Py3.10+ syntax
match self.layout_class: # noqa
case 0: #compact storage
raise NotImplementedError("Compact Storage")
self._data = self._get_compact_data(dataobject)
case 1: # contiguous storage
self.data_offset, = struct.unpack_from('<Q', dataobject.msg_data, self.property_offset)
case 2: # chunked storage
Expand Down Expand Up @@ -154,14 +154,14 @@ def read_direct_chunk(self, chunk_position, **kwargs):
raise OSError("Chunk coordinates must lie on chunk boundaries")
storeinfo = self._index[chunk_position]
return storeinfo.filter_mask, self._get_raw_chunk(storeinfo)

def get_data(self, args, fillvalue):
""" Called by the dataset getitem method """
dtype = self._dtype
# throws a flake8 wobbly for Python<3.10; match is Py3.10+ syntax
match self.layout_class: # noqa
case 0: #compact storage
raise NotImplementedError("Compact Storage")
return self._read_compact_data(args, fillvalue)
case 1: # contiguous storage
if self.data_offset == UNDEFINED_ADDRESS:
# no storage is backing array, return an array of
Expand Down Expand Up @@ -373,6 +373,40 @@ def _get_contiguous_data(self, args):
except UnsupportedOperation:
return self._get_direct_from_contiguous(args)

def _get_compact_data(self, dataobject):
data = None
layout = None
for msg in dataobject.msgs:
if msg["type"] == 8:
layout = msg
break
if layout is None:
raise ValueError("No layout message in compact dataset?")
byts = dataobject.msg_data[msg["offset_to_message"]:msg["offset_to_message"]+msg["size"]]
layout_version = byts[0]
if layout_version == 1 or layout_version == 2:
raise NotImplementedError("Compact layout v1 and v2.")
elif layout_version == 3 or layout_version == 4:
size = int.from_bytes(byts[2:4], "little")
data = byts[4:4+size]
else:
raise ValueError("Unknown layout version.")
return data

def _read_compact_data(self, args, fillvalue):
if self._data is None:
if isinstance(self._dtype, tuple):
dtype = np.array(fillvalue).dtype
return np.full(self.shape, fillvalue, dtype=dtype)[args]
else:
view = np.frombuffer(
self._data,
dtype=self._dtype,
).reshape(self.shape)
# Create the sub-array
result = view[args]
return result


def _get_direct_from_contiguous(self, args=None):
"""
Expand Down
36 changes: 36 additions & 0 deletions tests/test_compact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

import numpy as np
import pytest
from numpy.testing import assert_array_equal

import pyfive
import h5py


def test_compact_dataset_hdf5(name, data):
with pyfive.File(name) as hfile:
# check data
dset1 = hfile['compact']
assert_array_equal(dset1[...], data)


@pytest.fixture(scope='module')
def data():
return np.array([1, 2, 3, 4], dtype=np.int32)


@pytest.fixture(scope='module')
def name(data):
name = os.path.join(os.path.dirname(__file__), 'compact.hdf5')

f = h5py.File(name, 'w', libver='earliest')
dtype = h5py.h5t.NATIVE_INT32
space = h5py.h5s.create_simple(data.shape)
dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
dcpl.set_layout(h5py.h5d.COMPACT)
dset_id = h5py.h5d.create(f.id, b"compact", dtype, space, dcpl=dcpl)
dset_id.write(h5py.h5s.ALL, h5py.h5s.ALL, data)
f.close()

return name
Loading