Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyfive/dataobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ def _attr_value(self, dtype, buf, count, offset):
if isinstance(dtype, tuple):
if dtype[0] == "ENUMERATION":
dtype = np.dtype(dtype[1], metadata={'enum': dtype[2]})
elif dtype[0] == "COMPOUND":
dtype = np.dtype(dtype[1])

if isinstance(dtype, tuple):
dtype_class = dtype[0]
Expand Down
2 changes: 1 addition & 1 deletion pyfive/datatype_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _determine_dtype_compound(self, datatype_msg):
prop2['dim_size_4'] == 0
)
if names_valid and dtypes_valid and offsets_valid and props_valid:
return complex_dtype_map[dtype1]
return "COMPOUND", complex_dtype_map[dtype1]

raise NotImplementedError("Compound dtype not supported.")

Expand Down
2 changes: 2 additions & 0 deletions pyfive/h5d.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ def __init__(self, dataobject, pseudo_chunking_size_MB=4):
if isinstance(dataobject.dtype,tuple):
if dataobject.dtype[0] == 'ENUMERATION':
self._dtype = np.dtype(dataobject.dtype[1], metadata={'enum':dataobject.dtype[2]})
elif dataobject.dtype[0] == 'COMPOUND':
self._dtype = np.dtype(dataobject.dtype[1])
else:
self._dtype = dataobject.dtype
else:
Expand Down
14 changes: 12 additions & 2 deletions pyfive/h5py.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


from pyfive.datatype_msg import DatatypeMessage
from pyfive.h5t import TypeEnumID
from pyfive.h5t import TypeID, TypeEnumID, TypeCompoundID

import numpy as np
from pathlib import PurePosixPath
Expand All @@ -15,7 +15,17 @@ class Datatype:
suitable for use with enumerations.
"""
def __init__(self, name, hfile, raw_dtype):
self.id = TypeEnumID(raw_dtype)
id = raw_dtype
if isinstance(raw_dtype, tuple):
if raw_dtype[0] == "ENUMERATION":
id = TypeEnumID(raw_dtype[1:])
elif raw_dtype[0] == "COMPOUND":
id = TypeCompoundID(raw_dtype[1])
elif raw_dtype[0] == "VLEN_SEQUENCE":
id = TypeID(raw_dtype[1])
else:
id = TypeID(id)
self.id = id
path = PurePosixPath(name)
self.name = path.name
self.parent = str(path.parent) if str(path.parent) != '' else '/'
Expand Down
67 changes: 65 additions & 2 deletions pyfive/h5t.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,36 @@ def check_dtype(**kwds):
else:
return None

# todo: refactor the following classes, so TypeEnumID and TypeCompoundID sublass from the base TypeID.
class TypeID:
"""
Used by DataType to expose internal structure of a generic
datatype. This is instantiated by pyfive using arcane
hdf5 structure information, and should not normally be
needed by any user code.
"""
def __init__(self, raw_dtype):
"""
Initialised with the raw_dtype read from the message.
This is not the same init signature as h5py!
"""
super().__init__()
dtype = raw_dtype
self.kind = dtype.replace('<', '|')

def __eq__(self, other):
if type(self) != type(other):
return False
return self.dtype == other.dtype

@property
def dtype(self):
"""
The numpy dtype.
"""
return np.dtype(self.kind)


class TypeEnumID:
"""
Used by DataType to expose internal structure of an enum
Expand All @@ -87,10 +117,11 @@ def __init__(self, raw_dtype):
This is not the same init signature as h5py!
"""
super().__init__()
enum, dtype, enumdict = raw_dtype
dtype, enumdict = raw_dtype
self.metadata = {'enum':enumdict}
self.__reversed = None
self.kind = dtype.replace('<','|')

def enum_valueof(self, name):
"""
Get the value associated with an enum name.
Expand All @@ -105,10 +136,12 @@ def enum_nameof(self, index):
Determine the name associated with the given value.
"""
return self.__reversed[index]

def __eq__(self, other):
if type(self) != type(other):
return False
return self.metadata == other.metadata

@property
def dtype(self):
"""
Expand All @@ -121,5 +154,35 @@ def dtype(self):
x = my_datatype.id.dtype
enum_dict = x.metadata
"""
return np.dtype(self.kind,metadata=self.metadata)
return np.dtype(self.kind, metadata=self.metadata)


class TypeCompoundID:
"""
Used by DataType to expose internal structure of a compound
datatype. This is instantiated by pyfive using arcane
hdf5 structure information, and should not normally be
needed by any user code.
"""

def __init__(self, raw_dtype):
"""
Initialised with the raw_dtype read from the message.
This is not the same init signature as h5py!
"""
super().__init__()
dtype = raw_dtype
self.kind = dtype.replace('<', '|')

def __eq__(self, other):
if type(self) != type(other):
return False
return self.dtype == other.dtype

@property
def dtype(self):
"""
The numpy dtype.
"""
return np.dtype(self.kind)

146 changes: 96 additions & 50 deletions tests/make_dataset_datatypes_file.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,101 @@
#! /usr/bin/env python
""" Create a HDF5 file with datasets of many datatypes . """
import sys
import h5py
import numpy as np
from pathlib import Path

f = h5py.File('dataset_datatypes.hdf5', 'w')

# signed intergers
common_signed_args = {
'shape': (4, ),
'data': -np.arange(4),
'track_times': False,
}

f.create_dataset('int08_little', dtype='<i1', **common_signed_args)
f.create_dataset('int16_little', dtype='<i2', **common_signed_args)
f.create_dataset('int32_little', dtype='<i4', **common_signed_args)
f.create_dataset('int64_little', dtype='<i8', **common_signed_args)

f.create_dataset('int08_big', dtype='>i1', **common_signed_args)
f.create_dataset('int16_big', dtype='>i2', **common_signed_args)
f.create_dataset('int32_big', dtype='>i4', **common_signed_args)
f.create_dataset('int64_big', dtype='>i8', **common_signed_args)

# unsigned intergers
common_unsigned_args = {
'shape': (4, ),
'data': np.arange(4),
'track_times': False,
}

f.create_dataset('uint08_little', dtype='<u1', **common_unsigned_args)
f.create_dataset('uint16_little', dtype='<u2', **common_unsigned_args)
f.create_dataset('uint32_little', dtype='<u4', **common_unsigned_args)
f.create_dataset('uint64_little', dtype='<u8', **common_unsigned_args)

f.create_dataset('uint08_big', dtype='>u1', **common_unsigned_args)
f.create_dataset('uint16_big', dtype='>u2', **common_unsigned_args)
f.create_dataset('uint32_big', dtype='>u4', **common_unsigned_args)
f.create_dataset('uint64_big', dtype='>u8', **common_unsigned_args)

# floating point
common_float_args = {
'shape': (4, ),
'data': np.arange(4),
'track_times': False,
}

f.create_dataset('float32_little', dtype='<f4', **common_float_args)
f.create_dataset('float64_little', dtype='<f8', **common_float_args)

f.create_dataset('float32_big', dtype='>f4', **common_float_args)
f.create_dataset('float64_big', dtype='>f8', **common_float_args)

f.close()

def create_file(path):

with h5py.File(path, 'w') as f:

# signed integers
common_signed_args = {
'shape': (4, ),
'data': -np.arange(4),
'track_times': False,
}

f["int08_little_type"] = np.dtype('<i1')
f["int08_little_type2"] = np.dtype('<i1')

f.create_dataset('int08_little', dtype='<i1', **common_signed_args)
f.create_dataset('int16_little', dtype='<i2', **common_signed_args)
f.create_dataset('int32_little', dtype='<i4', **common_signed_args)
f.create_dataset('int64_little', dtype='<i8', **common_signed_args)

f.create_dataset('int08_big', dtype='>i1', **common_signed_args)
f.create_dataset('int16_big', dtype='>i2', **common_signed_args)
f.create_dataset('int32_big', dtype='>i4', **common_signed_args)
f.create_dataset('int64_big', dtype='>i8', **common_signed_args)

# unsigned intergers
common_unsigned_args = {
'shape': (4, ),
'data': np.arange(4),
'track_times': False,
}

f.create_dataset('uint08_little', dtype='<u1', **common_unsigned_args)
f.create_dataset('uint16_little', dtype='<u2', **common_unsigned_args)
f.create_dataset('uint32_little', dtype='<u4', **common_unsigned_args)
f.create_dataset('uint64_little', dtype='<u8', **common_unsigned_args)

f.create_dataset('uint08_big', dtype='>u1', **common_unsigned_args)
f.create_dataset('uint16_big', dtype='>u2', **common_unsigned_args)
f.create_dataset('uint32_big', dtype='>u4', **common_unsigned_args)
f.create_dataset('uint64_big', dtype='>u8', **common_unsigned_args)

# floating point
common_float_args = {
'shape': (4, ),
'data': np.arange(4),
'track_times': False,
}

f.create_dataset('float32_little', dtype='<f4', **common_float_args)
f.create_dataset('float64_little', dtype='<f8', **common_float_args)

f.create_dataset('float32_big', dtype='>f4', **common_float_args)
f.create_dataset('float64_big', dtype='>f8', **common_float_args)

# complex
common_complex_args = {
'shape': (1, ),
'data': 123+456.j,
'track_times': False,
}

f["complex64_little_type"] = np.dtype('<c8')
f["complex64_little_type2"] = np.dtype('<c8')
f["complex64_big_type"] = np.dtype('>c8')
f["complex128_little_type"] = np.dtype('<c16')
f["complex128_big_type"] = np.dtype('>c16')

f.create_dataset('complex64_little', dtype='<c8', **common_complex_args)
f.create_dataset('complex128_little', dtype='<c16', **common_complex_args)

f.create_dataset('complex64_big', dtype='>c8', **common_complex_args)
f.create_dataset('complex128_big', dtype='>c16', **common_complex_args)

# vlen
for endian in ["<", ">"]:
for base in ["i", "u", "f"]:
for width in ["1", "2", "4", "8"]:
if base == "f" and width == "1":
continue
tstr = "".join([endian, base, width])
dtype = h5py.vlen_dtype(np.dtype(tstr))
f[f"vlen_{tstr}_type"] = dtype
ds = f.create_dataset(f"vlen_{tstr}", (4,), dtype=dtype)
ds[0] = [0]
ds[1] = [0, 1]
ds[2] = [0, 1, 2]
ds[3] = [0, 1, 2, 3]


if __name__ == "__main__":
default_path = Path(__file__).parent / "dataset_datatypes.hdf5"
filepath = Path(sys.argv[1]) if len(sys.argv) > 1 else default_path
create_file(filepath)
44 changes: 31 additions & 13 deletions tests/make_enum_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,46 @@
(1) the netcdf interface, and
(2) the h5py interface
"""
from netCDF4 import Dataset
import sys
import netCDF4
import h5py
import numpy as np
from pathlib import Path

clouds = ['stratus','stratus','missing','nimbus','cumulus','longcloudname']
selection = ['stratus','nimbus','missing','nimbus','longcloudname']
enum_dict = {v:k for k,v in enumerate(clouds)}
enum_dict['missing'] = 255
data = [enum_dict[k] for k in selection]

ncd = Dataset('enum_variable.nc','w')
enum_type = ncd.createEnumType(np.uint8,'enum_t', enum_dict)
def create_nc_file(path):
with netCDF4.Dataset(path, mode='w') as ncd:
enum_type = ncd.createEnumType(np.uint8,'enum_t', enum_dict)

dim = ncd.createDimension('axis',5)
enum_var = ncd.createVariable('enum_var',enum_type,'axis',
fill_value=enum_dict['missing'])
enum_var[:] = data
ncd.close()
# add types for checking comparison
enum_type2 = ncd.createEnumType(np.uint8,'enum2_t', enum_dict)
vlen_t = ncd.createVLType(np.int32, "phony_vlen")

hcd = h5py.File('enum_variable.hdf5','w')
dt = h5py.enum_dtype(enum_dict,basetype='i')
assert h5py.check_enum_dtype(dt) == enum_dict
ds = hcd.create_dataset('enum_var', data=data, dtype=dt)
hcd.close()
dim = ncd.createDimension('axis',5)
enum_var = ncd.createVariable('enum_var',enum_type,'axis',
fill_value=enum_dict['missing'])
enum_var[:] = data


def create_hdf_file(path):
with h5py.File(path,'w') as hcd:
dt = h5py.enum_dtype(enum_dict, basetype='i')
assert h5py.check_enum_dtype(dt) == enum_dict
ds = hcd.create_dataset('enum_var', data=data, dtype=dt)


if __name__ == "__main__":
default_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).parent
if len(sys.argv) == 1:
create_nc_file(default_path / 'enum_variable.nc' )
create_hdf_file(default_path / 'enum_variable.hdf5')
else:
if default_path.suffix == ".hdf5":
create_hdf_file(default_path)
else:
create_nc_file(default_path)
Loading
Loading