Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make handling data with more batch dimensions possible #54

Merged
merged 2 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions rul_datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,28 +60,35 @@ def get_targets_from_file_paths(
return targets


def extract_windows(seq: np.ndarray, window_size: int) -> np.ndarray:
def extract_windows(seq: np.ndarray, window_size: int, dilation: int = 1) -> np.ndarray:
"""
Extract sliding windows from a sequence.

The step size is considered to be one, which results in `len(seq) - window_size +
1` extracted windows. The resulting array has the shape [num_windows, window_size,
num_channels].

If dilation is set to a value greater than one, the window will not contain
consecutive time steps. Instead, the time steps are spaced by the dilation value.
In this case, the number of extracted windows is `len(seq) - (window_size - 1) *
dilation`.

Args:
seq: sequence to extract windows from
window_size: length of the sliding window
dilation: dilation of the sliding window
Returns:
array of sliding windows
"""
if window_size > len(seq):
raise ValueError(
f"Cannot extract windows of size {window_size} "
f"Cannot extract windows of size {window_size} with dilation {dilation}"
f"from a sequence of length {len(seq)}."
)

num_frames = seq.shape[0] - window_size + 1
window_idx = np.arange(window_size)[None, :] + np.arange(num_frames)[:, None]
num_frames = seq.shape[0] - (window_size - 1) * dilation
window_idx = np.arange(window_size)[None, :] * dilation
window_idx = window_idx + np.arange(num_frames)[:, None]
windows = seq[window_idx]

return windows
Expand Down Expand Up @@ -137,7 +144,15 @@ def to_tensor(


def feature_to_tensor(features: np.ndarray, dtype: torch.dtype) -> torch.Tensor:
if len(features.shape) == 2:
return torch.tensor(features, dtype=dtype).permute(1, 0)
else:
return torch.tensor(features, dtype=dtype).permute(0, 2, 1)
"""
Convert a numpy array to a torch tensor of `dtype` and swap the last dimensions.

The function assumes that the last dimension of the numpy array is the channel
dimension, and the second to last is the time dimension. All preceding dimensions
are considered to be batch dimensions.

Args:
features: numpy array to convert
dtype: dtype of the resulting tensor
"""
return torch.transpose(torch.tensor(features, dtype=dtype), -1, -2)
3 changes: 3 additions & 0 deletions tests/reader/test_ncmapss.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@ def test_additional_hparams():
def test_prepare_data(should_run, mocker):
mocker.patch("os.path.exists", return_value=not should_run)
mock_save_scaler = mocker.patch("rul_datasets.reader.ncmapss.scaling.save_scaler")
mock_download = mocker.patch("rul_datasets.reader.ncmapss._download_ncmapss")

NCmapssReader(1).prepare_data()
if should_run:
mock_download.assert_called_once()
mock_save_scaler.assert_called_once()
else:
mock_download.assert_not_called()
mock_save_scaler.assert_not_called()


Expand Down
19 changes: 11 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,28 @@ def test_get_targets_from_file_paths(file_path_func):
@pytest.mark.parametrize(
"window_size", [1, 5, 10, pytest.param(11, marks=pytest.mark.xfail)]
)
def test_extract_windows(window_size):
inputs = np.arange(10)
windows = utils.extract_windows(inputs, window_size)
@pytest.mark.parametrize("dilation", [1, 2, 3])
def test_extract_windows(window_size, dilation):
inputs = np.arange(30)
windows = utils.extract_windows(inputs, window_size, dilation)

expected_num_windows = len(inputs) - window_size + 1
expected_num_windows = len(inputs) - (window_size - 1) * dilation
for i in range(expected_num_windows):
expected_window = inputs[i : (i + window_size)]
expected_window = inputs[i : (i + window_size * dilation) : dilation]
npt.assert_equal(windows[i], expected_window)


@pytest.mark.parametrize("num_targets", [0, 1, 2])
def test_to_tensor(num_targets):
features = [np.random.randn(10, 100, 2)]
@pytest.mark.parametrize("num_batch_dims", [0, 1, 2, 3])
def test_to_tensor(num_targets, num_batch_dims):
batch_dims = (10,) * num_batch_dims
features = [np.random.randn(*batch_dims, 100, 2)]
targets = [[np.arange(10)]] * num_targets

tensor_features, *tensor_targets = utils.to_tensor(features, *targets)

assert isinstance(tensor_features, list)
assert tensor_features[0].shape == (10, 2, 100)
assert tensor_features[0].shape == (*batch_dims, 2, 100)
assert tensor_features[0].dtype == torch.float32

assert len(tensor_targets) == num_targets
Expand Down
Loading