Skip to content

Commit

Permalink
initializeHdf5Write and writeDictToHdf5Chunk modified to be able to w… (
Browse files Browse the repository at this point in the history
#38)

* initializeHdf5Write and writeDictToHdf5Chunk modified to be able to write several groups in a file

* initializeHdf5Write and writeDictToHdf5Chunk modified to be able to write dictionaries of dictionaries

* Modifying unit test, and adding explanation to initializeHdf5Write

* Fixing the tests for the new input formats
  • Loading branch information
joselotl authored May 16, 2022
1 parent a107f0d commit f73b06f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 27 deletions.
20 changes: 15 additions & 5 deletions nb/multipleWriteHdf5_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,17 @@
"metadata": {},
"outputs": [],
"source": [
"dout = set_lengths(get_shapes_and_type(data['data']), 10000)"
"dout = {'data':set_lengths(get_shapes_and_type(data['data']), 10000)}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71602bd8",
"metadata": {},
"outputs": [],
"source": [
"print(dout)"
]
},
{
Expand All @@ -104,7 +114,7 @@
"outputs": [],
"source": [
"os.unlink('test_multi_write.hdf5')\n",
"group, fout = tables_io.io.initializeHdf5Write('test_multi_write.hdf5', 'data', **dout)"
"groups, fout = tables_io.io.initializeHdf5Write('test_multi_write.hdf5', **dout)"
]
},
{
Expand All @@ -126,7 +136,7 @@
" data = make_test_data()\n",
" start = i*1000\n",
" end = (i+1)*1000\n",
" tables_io.io.writeDictToHdf5Chunk(group, data['data'], start, end)"
" tables_io.io.writeDictToHdf5Chunk(groups, data, start, end)"
]
},
{
Expand Down Expand Up @@ -186,7 +196,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -200,7 +210,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
41 changes: 22 additions & 19 deletions tables_io/ioUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,13 @@ def getInputDataLengthHdf5(filepath, groupname=None):
return nrow


def initializeHdf5Write(filepath, groupname=None, **kwds):
def initializeHdf5Write(filepath, **kwds):
""" Prepares an hdf5 file for output
Parameters
----------
filepath : `str`
The output file name
groupname : `str` or `None`
The output group name
Returns
-------
Expand All @@ -98,34 +96,38 @@ def initializeHdf5Write(filepath, groupname=None, **kwds):
Notes
-----
The keywords should be used to create_datasets within the hdf5 file.
Each keyword should provide a tuple of ( (shape), (dtype) )
The keywords should be used to create groups within the hdf5 file.
Each keyword should provide a dictionary with the data set information of the form:
group = {'data1' : ( (shape1), (dtype1) ), 'data2' : ( (shape2), (dtype2) )}
group : `str`
Name of the Hdf5 group
data : `str`
Name of the column to be written
shape : `tuple` ( `int` )
The shape of the data for this dataset
dtype : `str`
The data type for this dataset
For exmaple
`initialize_writeout('test.hdf5', scalar=((100000,), 'f4'), vect=((100000, 3), 'f4'))`
`initializeHdf5Write('test.hdf5', data = dict(scalar=((100000,), 'f4'), vect=((100000, 3), 'f4'))`
Would initialize an hdf5 file with two datasets, with shapes and data types as given
Would initialize an hdf5 file with one group and two datasets, with shapes and data types as given
"""
outdir = os.path.dirname(os.path.abspath(filepath))
if not os.path.exists(outdir): #pragma: no cover
os.makedirs(outdir, exist_ok=True)
outf = h5py.File(filepath, "w")
if groupname is None: #pragma: no cover
group = outf
else:
group = outf.create_group(groupname)

groups = {}
for k, v in kwds.items():
group.create_dataset(k, v[0], v[1])
return group, outf
group = outf.create_group(k)
groups[k] = group
for key, shape in v.items():
group.create_dataset(key, shape[0], shape[1])
return groups, outf


def writeDictToHdf5Chunk(fout, odict, start, end, **kwds):
def writeDictToHdf5Chunk(groups, odict, start, end, **kwds):
""" Writes a data chunk to an hdf5 file
Parameters
Expand Down Expand Up @@ -156,9 +158,10 @@ def writeDictToHdf5Chunk(fout, odict, start, end, **kwds):
I.e., if `key` is present in kwds in will override the name.
"""
for key, val in odict.items():
k_out = kwds.get(key, key)
fout[k_out][start:end] = val
for group_name, group in groups.items():
for key, val in odict[group_name].items():
k_out = kwds.get(key, key)
group[k_out][start:end] = val


def finalizeHdf5Write(fout, groupname=None, **kwds):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_fileIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ def test_write_output_file():
zgrid = np.linspace(0, 4, nbins)
zmode = zgrid[np.argmax(pz_pdf, axis=1)]

data_dict = dict(zmode=zmode, pz_pdf=pz_pdf)
data_dict = {'data': dict(zmode=zmode, pz_pdf=pz_pdf)}

group, outf = io.initializeHdf5Write(test_outfile, 'data', photoz_mode=((npdf,), 'f4'), photoz_pdf=((npdf, nbins), 'f4'))
io.writeDictToHdf5Chunk(group, data_dict, 0, npdf, zmode='photoz_mode', pz_pdf='photoz_pdf')
groups, outf = io.initializeHdf5Write(test_outfile, data = dict(photoz_mode=((npdf,), 'f4'), photoz_pdf=((npdf, nbins), 'f4')))
io.writeDictToHdf5Chunk(groups, data_dict, 0, npdf, zmode='photoz_mode', pz_pdf='photoz_pdf')
io.finalizeHdf5Write(outf, 'md', zgrid=zgrid)

os.unlink(test_outfile)

0 comments on commit f73b06f

Please sign in to comment.