Skip to content
Merged
7 changes: 7 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Version ?.?.?
-------------

**2026-??-??**

* Allow negative slices when indexing chunked data by `David Hassell <https://github.com/davidhassell>`_ in https://github.com/NCAS-CMS/pyfive/pull/170

Version 1.0.1
-------------

Expand Down
18 changes: 11 additions & 7 deletions pyfive/h5d.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,17 @@ def get_data(self, args, fillvalue):
return np.full(self.shape, fillvalue, dtype=self.dtype)[args]

def iter_chunks(self, args):
"""
Iterate over chunks in a chunked dataset.
The optional sel argument is a slice or tuple of slices that defines the region to be used.
If not set, the entire dataspace will be used for the iterator.
For each chunk within the given region, the iterator yields a tuple of slices that gives the
intersection of the given chunk with the selection area.
This can be used to read data in that chunk.
"""Iterate over chunks in a chunked dataset.

The args argument is a (possibly empty) sequence of indices
that defines the region to be used. If an empty sequence then
the entire dataspace will be used for the iterator.

For each chunk within the given region, the iterator yields a
tuple of indices that gives the intersection of the given
chunk with the selection area. This can be used to read data
in that chunk.

"""
if not self.__chunk_init_check():
return None
Expand Down
143 changes: 133 additions & 10 deletions pyfive/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@ class BoundsCheckError(_BaseZarrIndexError):
_msg = "index out of bounds for dimension with length {0}"


class NegativeStepError(IndexError):
def __init__(self):
super().__init__("only slices with step >= 1 are supported")


# And the rest of the code is the original file.


def is_integer(x):
"""True if x is an integer (both pure Python or NumPy).

Expand Down Expand Up @@ -228,8 +220,6 @@ class SliceDimIndexer:
def __init__(self, dim_sel, dim_len, dim_chunk_len):
# normalize
self.start, self.stop, self.step = dim_sel.indices(dim_len)
if self.step < 1:
raise NegativeStepError()

# store attributes
self.dim_len = dim_len
Expand Down Expand Up @@ -664,6 +654,9 @@ def __init__(self, selection, array):
# normalize list to array
selection = replace_lists(selection)

# Reverse negative slices
selection, reverse_dims = replace_negative_slices(selection, array._shape)

# setup per-dimension indexers
dim_indexers = []
for dim_sel, dim_len, dim_chunk_len in zip(
Expand Down Expand Up @@ -696,6 +689,8 @@ def __init__(self, selection, array):
s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)
)
self.is_advanced = not is_basic_selection(selection)
self.reverse_dims = reverse_dims

if self.is_advanced:
self.drop_axes = tuple(
i
Expand Down Expand Up @@ -725,6 +720,34 @@ def __iter__(self):
if not is_basic_selection(out_selection):
out_selection = ix_(out_selection, self.shape)

if self.reverse_dims:
# A requested negative slice was changed to a positive
# slice (e.g. slice(7, 3, -1) -> slice(4, 8, 1)), so
# modify the index to the output array to ensure that
# the chunk selection goes in the correct place and in
# the correct order.
#
# E.g. For an output array axis of size 7:
# slice(0, 1) -> slice(6, 5, -1)
# slice(1, 4) -> slice(5, 2, -1)
# slice(4, 7) -> slice(2, None, -1)
#
# Note that the step for the modified output array
# slice is always -1
out_selection = list(out_selection)
for i in self.reverse_dims:
size = self.shape[i]
start, stop, step = out_selection[i].indices(size)

start = size - start - 1
stop = size - stop - 1
if stop < 0:
stop = None

out_selection[i] = slice(start, stop, -1)

out_selection = tuple(out_selection)

yield ChunkProjection(chunk_coords, chunk_selection, out_selection)


Expand Down Expand Up @@ -1068,6 +1091,106 @@ def make_slice_selection(selection):
return ls


def replace_negative_slices(selection, shape):
"""Replace negative slices with positve slices.

A slice object that is decreasing (i.e. with a negative step), is
recast as an increasing slice (i.e. with a positive step. For
example ``slice(7,3,-1)`` would be cast as ``slice(4,8,1)``. This
is to facilitate finding which chunks are touched by the
index. The dimensions in the indexed array for which this has
occurred are returned so that they can be reversed later.

All other indices are unchanged.

Parameters
----------
selection : numpy-style indices
Indices to an array of the given shape.

shape : tuple of `int`
The shape of the array.

Returns
-------
modified_selection : `tuple`
The reformatted indices that give the will correct subspace
after output dimensions have been reversed, as appropriate.

reverse_dims : `tuple`
The positions in the output array of dimensions that will need
reversing. The tuple wll be empty if there were no negative
slices.

"""
indices = replace_ellipsis(selection, shape)

# Initialise outputs
modified_selection = []
reverse_dims = []

# Dimension positions of the array *after* indexing (some
# dimensions might get dropped by integer indices)
dim = 0

# Replace a negative slice index with its reversed version
for index, size in zip(selection, shape):
if isinstance(index, slice):
start, stop, step = index.indices(size)

if step < 0:
# When the slice step is negative, transform the
# original slice to a new slice with a positive step
# such that the result of the new slice is the reverse
# of the result of the original slice.
#
# For example, if the original slice is slice(6,0,-2)
# then the new slice will be slice(2,7,2):
#
# >>> a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# >>> a[slice(6, 0, -2)]
# [6, 4, 2]
# >>> a[slice(2, 7, 2)]
# [2, 4, 6]
# >>> a[slice(6, 0, -2)] == list(reversed(a[slice(2, 7, 2)]))
# True
if stop == -1:
stop = None

index = slice(start, stop, step)
start, stop, step = index.indices(size)

step *= -1
div, mod = divmod(start - stop - 1, step)
div_step = div * step
start -= div_step
stop = start + div_step + 1

index = slice(start, stop, step)
reverse_dims.append(dim)

elif isinstance(index, int):
dim -= 1

elif isinstance(index, np.ndarray):
if not index.ndim:
dim -= 1

elif index == Ellipsis:
raise ValueError(
"replace_negative_slices(selection, shape) doesn't work "
"when selection contains Ellipsis. Consider doing "
"selection=pyfive.indexing.replace_ellipsis(selection, shape) "
"first"
)

dim += 1

modified_selection.append(index)

return tuple(modified_selection), tuple(reverse_dims)


class PartialChunkIterator:
"""Iterator to retrieve the specific coordinates of requested data
from within a compressed chunk.
Expand Down
67 changes: 67 additions & 0 deletions tests/test_dataset_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Test pyfive's abililty to index Datasets."""

import os

import numpy as np
from numpy.testing import assert_array_equal
import pytest

import pyfive

DIRNAME = os.path.dirname(__file__)
DATASET_CHUNKED_FILE = os.path.join(DIRNAME, "data/chunked.hdf5")


# Define a fixture that opens the chunked file once
@pytest.fixture(scope="module")
def chunked_file():
with pyfive.File(DATASET_CHUNKED_FILE) as hfile:
yield hfile


@pytest.mark.parametrize(
"index",
[
(slice(None, None, -1), slice(None, None, -1)),
(slice(None), slice(1000, 1, -2)),
(..., slice(None, None, -2)),
(slice(None, None, -2), ...),
(0, slice(-1, None, -3)),
(slice(-1, None, -3), 0),
],
)
def test_dataset_indexing_chunked_negative_step_slices(chunked_file, index):
"""Test orthogonal chunked indexing with negative step slices"""
d = chunked_file["dataset1"]
assert d.shape == (21, 16)

array = d[...]
assert array.shape == d.shape

assert_array_equal(d[index], array[index])


def test_dataset_indexing_replace_negative_slices():
"""Test pyfive.indexing.replace_negative_slices"""
func = pyfive.indexing.replace_negative_slices

r = list(range(8))
assert func((0, slice(6, 0, -2)), (7, 8, 9)) == ((0, slice(2, 7, 2)), (0,))
assert r[slice(6, 0, -2)] == list(reversed(r[slice(2, 7, 2)]))

assert func(([1, 2], slice(600, 1, -3), 0), (7, 8, 9)) == (
([1, 2], slice(4, 8, 3), 0),
(1,),
)
assert r[slice(600, None, -3)] == list(reversed(r[slice(1, 8, 3)]))

r = list(range(9))
assert func((0, slice(None), slice(6, 0, -2)), (7, 8, 9)) == (
(0, slice(None), slice(2, 7, 2)),
(1,),
)
assert r[slice(6, 0, -2)] == list(reversed(r[slice(2, 7, 2)]))

# Can't pass in in Ellipsis
with pytest.raises(ValueError) as error:
func((0, slice(6, 0, -2), ...), (7, 8, 9))
Loading