Skip to content

Commit 5443b3d

Browse files
TonyBagnallSebastian SchmidlchrisholderMatthewMiddlehurst
authored
[ENH] Remove make_series part II (#2310)
* anomaly tests * anomaly tests * stomp and kmeansad * pyod adaptor * pyod adaptor * pyod adaptor * iforest * seed * split functions, use check_random * test clean up * check_series * fix soft-dtw cost matrix test * check_series * pairwise * viz * remove make_series * Update test_forecasting_plotting.py * Update test_segmentation_plotting.py * fixing distance tests * fixed distance tests * fix shape-dtw docs --------- Co-authored-by: Sebastian Schmidl <CodeLionX@users.noreply.github.com> Co-authored-by: chrisholder <chrisholder987@hotmail.com> Co-authored-by: Matthew Middlehurst <pfm15hbu@gmail.com>
1 parent 980e8bb commit 5443b3d

File tree

17 files changed

+231
-315
lines changed

17 files changed

+231
-315
lines changed

aeon/clustering/averaging/tests/test_dba.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010
petitjean_barycenter_average,
1111
subgradient_barycenter_average,
1212
)
13-
from aeon.testing.data_generation import make_example_3d_numpy
14-
from aeon.testing.data_generation._legacy import make_series
13+
from aeon.testing.data_generation import (
14+
make_example_1d_numpy,
15+
make_example_2d_numpy_series,
16+
make_example_3d_numpy,
17+
)
1518

1619
expected_petitjean_dba_univariate = np.array(
1720
[
@@ -231,10 +234,8 @@ def test_elastic_dba_variations(distance):
231234
"medoids",
232235
"random",
233236
(
234-
make_series(10, 1, return_numpy=True, random_state=1),
235-
make_series(
236-
n_timepoints=4, n_columns=10, return_numpy=True, random_state=1
237-
),
237+
make_example_1d_numpy(10, random_state=1),
238+
make_example_2d_numpy_series(n_timepoints=10, n_channels=4, random_state=1),
238239
),
239240
],
240241
)
@@ -303,7 +304,7 @@ def test_incorrect_input():
303304
"init_barycenter shape is invalid. Expected (1, 10) but " "got (1, 9)"
304305
),
305306
):
306-
elastic_barycenter_average(X, init_barycenter=make_series(9, return_numpy=True))
307+
elastic_barycenter_average(X, init_barycenter=make_example_1d_numpy(9))
307308

308309
# Test invalid berycenter method
309310
with pytest.raises(

aeon/distances/elastic/_msm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ def _msm_independent_cost_matrix(
247247
y_size = y.shape[1]
248248
cost_matrix = np.zeros((x_size, y_size))
249249
distance = 0
250-
for i in range(x.shape[0]):
250+
min_instances = min(x.shape[0], y.shape[0])
251+
for i in range(min_instances):
251252
curr_cost_matrix = _independent_cost_matrix(x[i], y[i], bounding_matrix, c)
252253
cost_matrix = np.add(cost_matrix, curr_cost_matrix)
253254
distance += curr_cost_matrix[-1, -1]

aeon/distances/elastic/_shape_dtw.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _identity_descriptor(x: np.ndarray) -> np.ndarray:
6060

6161
@njit(cache=True, fastmath=True)
6262
def _transform_subsequences(
63-
x: np.ndarray, descriptor: str = "identity", reach: int = 30
63+
x: np.ndarray, descriptor: str = "identity", reach: int = 15
6464
) -> np.ndarray:
6565
"""Decompose the series into sub-sequences.
6666
@@ -80,7 +80,7 @@ def _transform_subsequences(
8080
For now no other descriptors are implemented.
8181
8282
If not specified then identity is used.
83-
reach : int, default=30.
83+
reach : int, default=15.
8484
Length of the sub-sequences.
8585
8686
Returns
@@ -123,7 +123,7 @@ def shape_dtw_distance(
123123
y: np.ndarray,
124124
window: Optional[float] = None,
125125
descriptor: str = "identity",
126-
reach: int = 30,
126+
reach: int = 15,
127127
itakura_max_slope: Optional[float] = None,
128128
transformation_precomputed: bool = False,
129129
transformed_x: Optional[np.ndarray] = None,
@@ -156,7 +156,7 @@ def shape_dtw_distance(
156156
For now no other descriptors are implemented.
157157
158158
If not specified then identity is used.
159-
reach : int, default=30.
159+
reach : int, default=15.
160160
Length of the sub-sequences to consider.
161161
itakura_max_slope : float, default=None
162162
Maximum slope as a proportion of the number of time points used to create
@@ -237,7 +237,7 @@ def _shape_dtw_distance(
237237
y: np.ndarray,
238238
bounding_matrix: np.ndarray,
239239
descriptor: str = "identity",
240-
reach: int = 30,
240+
reach: int = 15,
241241
transformation_precomputed: bool = False,
242242
transformed_x: Optional[np.ndarray] = None,
243243
transformed_y: Optional[np.ndarray] = None,
@@ -297,7 +297,7 @@ def shape_dtw_cost_matrix(
297297
y: np.ndarray,
298298
window: Optional[float] = None,
299299
descriptor: str = "identity",
300-
reach: int = 30,
300+
reach: int = 15,
301301
itakura_max_slope: Optional[float] = None,
302302
transformation_precomputed: bool = False,
303303
transformed_x: Optional[np.ndarray] = None,
@@ -327,7 +327,7 @@ def shape_dtw_cost_matrix(
327327
For now no other descriptors are implemented.
328328
329329
If not specified then identity is used.
330-
reach : int, default=30.
330+
reach : int, default=15.
331331
Length of the sub-sequences.
332332
itakura_max_slope : float, default=None
333333
Maximum slope as a proportion of the number of time points used to create
@@ -398,7 +398,7 @@ def _shape_dtw_cost_matrix(
398398
y: np.ndarray,
399399
bounding_matrix: np.ndarray,
400400
descriptor: str = "identity",
401-
reach: int = 30,
401+
reach: int = 15,
402402
transformation_precomputed: bool = False,
403403
transformed_x: Optional[np.ndarray] = None,
404404
transformed_y: Optional[np.ndarray] = None,
@@ -430,7 +430,7 @@ def shape_dtw_alignment_path(
430430
y: np.ndarray,
431431
window: Optional[float] = None,
432432
descriptor: str = "identity",
433-
reach: int = 30,
433+
reach: int = 15,
434434
itakura_max_slope: Optional[float] = None,
435435
transformation_precomputed: bool = False,
436436
transformed_x: Optional[np.ndarray] = None,
@@ -460,7 +460,7 @@ def shape_dtw_alignment_path(
460460
For now no other descriptors are implemented.
461461
462462
If not specified then identity is used.
463-
reach : int, default=30.
463+
reach : int, default=15.
464464
Length of the sub-sequences.
465465
itakura_max_slope : float, default=None
466466
Maximum slope as a proportion of the number of time points used to create
@@ -520,7 +520,7 @@ def shape_dtw_pairwise_distance(
520520
y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
521521
window: Optional[float] = None,
522522
descriptor: str = "identity",
523-
reach: int = 30,
523+
reach: int = 15,
524524
itakura_max_slope: Optional[float] = None,
525525
transformation_precomputed: bool = False,
526526
transformed_x: Optional[np.ndarray] = None,
@@ -552,7 +552,7 @@ def shape_dtw_pairwise_distance(
552552
For now no other descriptors are implemented.
553553
554554
If not specified then identity is used.
555-
reach : int, default=30.
555+
reach : int, default=15.
556556
Length of the sub-sequences.
557557
itakura_max_slope : float, default=None
558558
Maximum slope as a proportion of the number of time points used to create

aeon/distances/elastic/tests/test_alignment_path.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
DISTANCES_DICT,
1111
SINGLE_POINT_NOT_SUPPORTED_DISTANCES,
1212
)
13-
from aeon.testing.data_generation._legacy import make_series
13+
from aeon.testing.data_generation import (
14+
make_example_1d_numpy,
15+
make_example_2d_numpy_series,
16+
)
1417

1518

1619
def _validate_alignment_path_result(
@@ -57,26 +60,26 @@ def test_alignment_path(dist):
5760
# ================== Test equal length ==================
5861
# Test univariate of shape (n_timepoints,)
5962
_validate_alignment_path_result(
60-
make_series(10, return_numpy=True, random_state=1),
61-
make_series(10, return_numpy=True, random_state=2),
63+
make_example_1d_numpy(10, random_state=1),
64+
make_example_1d_numpy(10, random_state=2),
6265
dist["name"],
6366
dist["distance"],
6467
dist["alignment_path"],
6568
)
6669

6770
# Test univariate of shape (1, n_timepoints)
6871
_validate_alignment_path_result(
69-
make_series(10, 1, return_numpy=True, random_state=1),
70-
make_series(10, 1, return_numpy=True, random_state=2),
72+
make_example_2d_numpy_series(10, 1, random_state=1),
73+
make_example_2d_numpy_series(10, 1, random_state=1),
7174
dist["name"],
7275
dist["distance"],
7376
dist["alignment_path"],
7477
)
7578

7679
# Test multivariate of shape (n_channels, n_timepoints)
7780
_validate_alignment_path_result(
78-
make_series(10, 10, return_numpy=True, random_state=1),
79-
make_series(10, 10, return_numpy=True, random_state=2),
81+
make_example_2d_numpy_series(10, 10, random_state=1),
82+
make_example_2d_numpy_series(10, 10, random_state=2),
8083
dist["name"],
8184
dist["distance"],
8285
dist["alignment_path"],
@@ -85,26 +88,26 @@ def test_alignment_path(dist):
8588
# ================== Test unequal length ==================
8689
# Test univariate unequal length of shape (n_timepoints,)
8790
_validate_alignment_path_result(
88-
make_series(5, return_numpy=True, random_state=1),
89-
make_series(10, return_numpy=True, random_state=2),
91+
make_example_1d_numpy(5, random_state=1),
92+
make_example_1d_numpy(10, random_state=2),
9093
dist["name"],
9194
dist["distance"],
9295
dist["alignment_path"],
9396
)
9497

9598
# Test univariate unequal length of shape (1, n_timepoints)
9699
_validate_alignment_path_result(
97-
make_series(5, 1, return_numpy=True, random_state=1),
98-
make_series(10, 1, return_numpy=True, random_state=2),
100+
make_example_2d_numpy_series(5, 1, random_state=1),
101+
make_example_2d_numpy_series(10, 1, random_state=2),
99102
dist["name"],
100103
dist["distance"],
101104
dist["alignment_path"],
102105
)
103106

104107
# Test multivariate unequal length of shape (n_channels, n_timepoints)
105108
_validate_alignment_path_result(
106-
make_series(5, 10, return_numpy=True, random_state=1),
107-
make_series(10, 10, return_numpy=True, random_state=2),
109+
make_example_2d_numpy_series(5, 10, random_state=1),
110+
make_example_2d_numpy_series(10, 10, random_state=2),
108111
dist["name"],
109112
dist["distance"],
110113
dist["alignment_path"],

aeon/distances/elastic/tests/test_cost_matrix.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
DISTANCES_DICT,
1111
SINGLE_POINT_NOT_SUPPORTED_DISTANCES,
1212
)
13-
from aeon.testing.data_generation._legacy import make_series
13+
from aeon.testing.data_generation import (
14+
make_example_1d_numpy,
15+
make_example_2d_numpy_series,
16+
)
1417

1518

1619
def _validate_cost_matrix_result(
@@ -61,6 +64,8 @@ def _validate_cost_matrix_result(
6164
cost_matrix_result[-1, -1] / max(x.shape[-1], y.shape[-1])
6265
)
6366
assert_almost_equal(curr_distance, distance_result)
67+
elif name == "soft_dtw":
68+
assert_almost_equal(abs(cost_matrix_result[-1, -1]), distance_result)
6469
else:
6570
assert_almost_equal(cost_matrix_result[-1, -1], distance_result)
6671

@@ -88,26 +93,26 @@ def test_cost_matrix(dist):
8893
# ================== Test equal length ==================
8994
# Test univariate of shape (n_timepoints,)
9095
_validate_cost_matrix_result(
91-
make_series(10, return_numpy=True, random_state=1),
92-
make_series(10, return_numpy=True, random_state=2),
96+
make_example_1d_numpy(10, random_state=1),
97+
make_example_1d_numpy(10, random_state=2),
9398
dist["name"],
9499
dist["distance"],
95100
dist["cost_matrix"],
96101
)
97102

98103
# Test univariate of shape (1, n_timepoints)
99104
_validate_cost_matrix_result(
100-
make_series(10, 1, return_numpy=True, random_state=1),
101-
make_series(10, 1, return_numpy=True, random_state=2),
105+
make_example_2d_numpy_series(10, 1, random_state=1),
106+
make_example_2d_numpy_series(10, 1, random_state=2),
102107
dist["name"],
103108
dist["distance"],
104109
dist["cost_matrix"],
105110
)
106111

107112
# Test multivariate of shape (n_channels, n_timepoints)
108113
_validate_cost_matrix_result(
109-
make_series(10, 10, return_numpy=True, random_state=1),
110-
make_series(10, 10, return_numpy=True, random_state=2),
114+
make_example_2d_numpy_series(10, 10, random_state=1),
115+
make_example_2d_numpy_series(10, 10, random_state=2),
111116
dist["name"],
112117
dist["distance"],
113118
dist["cost_matrix"],
@@ -116,26 +121,26 @@ def test_cost_matrix(dist):
116121
# ================== Test unequal length ==================
117122
# Test univariate unequal length of shape (n_timepoints,)
118123
_validate_cost_matrix_result(
119-
make_series(5, return_numpy=True, random_state=1),
120-
make_series(10, return_numpy=True, random_state=2),
124+
make_example_1d_numpy(5, random_state=1),
125+
make_example_1d_numpy(10, random_state=2),
121126
dist["name"],
122127
dist["distance"],
123128
dist["cost_matrix"],
124129
)
125130

126131
# Test univariate unequal length of shape (1, n_timepoints)
127132
_validate_cost_matrix_result(
128-
make_series(5, 1, return_numpy=True, random_state=1),
129-
make_series(10, 1, return_numpy=True, random_state=2),
133+
make_example_2d_numpy_series(5, 1, random_state=1),
134+
make_example_2d_numpy_series(10, 1, random_state=2),
130135
dist["name"],
131136
dist["distance"],
132137
dist["cost_matrix"],
133138
)
134139

135140
# Test multivariate unequal length of shape (n_channels, n_timepoints)
136141
_validate_cost_matrix_result(
137-
make_series(5, 10, return_numpy=True, random_state=1),
138-
make_series(10, 10, return_numpy=True, random_state=2),
142+
make_example_2d_numpy_series(5, 10, random_state=1),
143+
make_example_2d_numpy_series(10, 10, random_state=2),
139144
dist["name"],
140145
dist["distance"],
141146
dist["cost_matrix"],

0 commit comments

Comments
 (0)