Skip to content

Commit dbaccab

Browse files
committed
merge main
2 parents e105710 + 11bb86d commit dbaccab

File tree

10 files changed

+198
-111
lines changed

10 files changed

+198
-111
lines changed

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@
111111
],
112112
}
113113

114+
html_context = {"default_mode": "light"}
115+
114116
# Add any paths that contain custom static files (such as style sheets) here,
115117
# relative to this directory. They are copied after the builtin static files,
116118
# so a file named "default.css" will overwrite the builtin "default.css".

stumpy/aamp_motifs.py

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def _aamp_motifs(
9797
motif_distances = []
9898

9999
candidate_idx = np.argmin(P[-1])
100-
for i in range(l):
100+
for _ in range(l):
101101
if len(motif_indices) >= max_motifs:
102102
break
103103

@@ -357,29 +357,17 @@ def aamp_match(
357357
to `Q` are less than or equal to`max_distance`, sorted by distance (lowest to
358358
highest). The second column consists of the corresponding indices in `T`.
359359
"""
360+
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
361+
raise ValueError("Q contains illegal values (NaN or inf)")
362+
360363
if len(Q.shape) == 1:
361364
Q = Q[np.newaxis, :]
362365
if len(T.shape) == 1:
363366
T = T[np.newaxis, :]
364367

365368
d, n = T.shape
366369
m = Q.shape[1]
367-
368370
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
369-
if max_matches is None: # pragma: no cover
370-
max_matches = np.inf
371-
372-
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
373-
raise ValueError("Q contains illegal values (NaN or inf)")
374-
375-
if max_distance is None: # pragma: no cover
376-
377-
def max_distance(D):
378-
D_copy = D.copy().astype(np.float64)
379-
D_copy[np.isinf(D_copy)] = np.nan
380-
return np.nanmax(
381-
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
382-
)
383371

384372
if T_subseq_isfinite is None:
385373
T, T_subseq_isfinite = core.preprocess_non_normalized(T, m)
@@ -389,28 +377,13 @@ def max_distance(D):
389377
D = np.empty((d, n - m + 1))
390378
for i in range(d):
391379
D[i, :] = core.mass_absolute(Q[i], T[i], T_subseq_isfinite[i], p=p)
392-
393380
D = np.mean(D, axis=0)
394-
if not isinstance(max_distance, float):
395-
max_distance = max_distance(D)
396-
397-
matches = []
398-
399-
if query_idx is not None:
400-
candidate_idx = query_idx
401-
else:
402-
candidate_idx = np.argmin(D)
403-
404-
for i in range(len(D)):
405-
if (
406-
D[candidate_idx] > atol + max_distance
407-
or ~np.isfinite(D[candidate_idx])
408-
or len(matches) >= max_matches
409-
):
410-
break
411-
412-
matches.append([D[candidate_idx], candidate_idx])
413-
core.apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
414-
candidate_idx = np.argmin(D)
415381

416-
return np.array(matches, dtype=object)
382+
return core._find_matches(
383+
D,
384+
excl_zone,
385+
max_distance=max_distance,
386+
max_matches=max_matches,
387+
query_idx=query_idx,
388+
atol=atol,
389+
)

stumpy/core.py

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,11 +1465,11 @@ def _mass_distance_matrix(Q, T, m, distance_matrix, μ_Q, σ_Q, M_T, Σ_T):
14651465
distance_matrix : numpy.ndarray
14661466
The full output distance matrix. This is mandatory since it may be reused.
14671467
1468-
μ_Q : float
1469-
Mean of `Q`
1468+
μ_Q : numpy.ndarray
1469+
Sliding mean of `Q`
14701470
1471-
σ_Q : float
1472-
Standard deviation of `Q`
1471+
σ_Q : numpy.ndarray
1472+
Sliding standard deviation of `Q`
14731473
14741474
M_T : numpy.ndarray
14751475
Sliding mean of `T`
@@ -2542,7 +2542,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
25422542
25432543
k : int
25442544
Specify the `k`th value in the concatenated matrix profiles to return. This
2545-
parameter is ignored when `k_func` is not None.
2545+
parameter is ignored when `custom_func` is not None.
25462546
25472547
custom_func : object, default None
25482548
A custom user defined function for selecting the desired value from the
@@ -2592,3 +2592,95 @@ def _check_P(P, threshold=1e-6):
25922592
if are_distances_too_small(P, threshold=threshold): # pragma: no cover
25932593
logger.warning(f"A large number of values in `P` are smaller than {threshold}.")
25942594
logger.warning("For a self-join, try setting `ignore_trivial=True`.")
2595+
2596+
2597+
def _find_matches(
2598+
D, excl_zone, max_distance=None, max_matches=None, query_idx=None, atol=1e-8
2599+
):
2600+
"""
2601+
Find all matches of a query `Q` whose distance profile with `T` is `D`.
2602+
2603+
Parameters
2604+
----------
2605+
D : numpy.ndarray
2606+
The distance profile of `Q` with `T`. It is a 1D numpy array of size
2607+
`len(T)-len(Q)+1`, where `D[i]` is the distance between query `Q` and
2608+
`T[i : i + len(Q)]`.
2609+
2610+
excl_zone : int
2611+
Size of the exclusion zone. That is, after finding the next-best-match
2612+
located at index `idx`, we ignore subsequences with start index in range
2613+
(idx - excl_zone, idx + excl_zone + 1).
2614+
2615+
max_distance : float or function, default None
2616+
Maximum distance between `Q` and a subsequence `S` for `S` to be considered a
2617+
match.
2618+
If a function, then it has to be a function of one argument `D`, which will be
2619+
the distance profile of `Q` with `T` (a 1D numpy array of size `n-m+1`).
2620+
If None, this defaults to
2621+
`np.nanmax([np.nanmean(D) - 2 * np.nanstd(D), np.nanmin(D)])` (i.e. at
2622+
least the closest match will be returned).
2623+
2624+
max_matches : int, default None
2625+
The maximum amount of similar occurrences to be returned. The resulting
2626+
occurrences are sorted by distance, so a value of `10` means that the
2627+
indices of the most similar `10` subsequences is returned. If `None`, then all
2628+
occurrences are returned.
2629+
2630+
query_idx : int, default None
2631+
This is the index position along the time series, `T`, where the query
2632+
subsequence, `Q`, is located.
2633+
`query_idx` should only be used when the matrix profile is a self-join and
2634+
should be set to `None` for matrix profiles computed from AB-joins.
2635+
If `query_idx` is set to a specific integer value, then this will help ensure
2636+
that the self-match will be returned first.
2637+
2638+
atol : float, default 1e-8
2639+
The absolute tolerance parameter. This value will be added to `max_distance`
2640+
when comparing distances between subsequences.
2641+
2642+
Returns
2643+
-------
2644+
out : numpy.ndarray
2645+
The first column consists of values selected from `D`. These are the distances
2646+
of subsequences of `T` whose distances to `Q` are less than or equal to
2647+
`max_distance`, sorted by distance (lowest to highest). The second column
2648+
consists of the corresponding indices in `D`. These are in fact the start index
2649+
of susequences in `T` selected as the match of `Q`.
2650+
2651+
"""
2652+
D = D.copy()
2653+
if max_distance is None:
2654+
2655+
def max_distance(D):
2656+
D_copy = D.copy().astype(np.float64)
2657+
D_copy[np.isinf(D_copy)] = np.nan
2658+
return np.nanmax(
2659+
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
2660+
)
2661+
2662+
if not isinstance(max_distance, float):
2663+
max_distance = max_distance(D)
2664+
2665+
if max_matches is None:
2666+
max_matches = np.inf
2667+
2668+
if query_idx is not None:
2669+
candidate_idx = query_idx
2670+
else:
2671+
candidate_idx = np.argmin(D)
2672+
2673+
matches = []
2674+
for _ in range(len(D)):
2675+
if (
2676+
D[candidate_idx] > atol + max_distance
2677+
or ~np.isfinite(D[candidate_idx])
2678+
or len(matches) >= max_matches
2679+
):
2680+
break
2681+
2682+
matches.append([D[candidate_idx], candidate_idx])
2683+
apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
2684+
candidate_idx = np.argmin(D)
2685+
2686+
return np.array(matches, dtype=object)

stumpy/motifs.py

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def _motifs(
9696
motif_distances = []
9797

9898
candidate_idx = np.argmin(P[-1])
99-
for i in range(l):
99+
for _ in range(l):
100100
if len(motif_indices) >= max_motifs:
101101
break
102102

@@ -422,29 +422,17 @@ def match(
422422
Q = core._preprocess(Q)
423423
T = core._preprocess(T)
424424

425+
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
426+
raise ValueError("Q contains illegal values (NaN or inf)")
427+
425428
if len(Q.shape) == 1:
426429
Q = Q[np.newaxis, :]
427430
if len(T.shape) == 1:
428431
T = T[np.newaxis, :]
429432

430433
d, n = T.shape
431434
m = Q.shape[1]
432-
433435
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
434-
if max_matches is None: # pragma: no cover
435-
max_matches = np.inf
436-
437-
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
438-
raise ValueError("Q contains illegal values (NaN or inf)")
439-
440-
if max_distance is None: # pragma: no cover
441-
442-
def max_distance(D):
443-
D_copy = D.copy().astype(np.float64)
444-
D_copy[np.isinf(D_copy)] = np.nan
445-
return np.nanmax(
446-
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
447-
)
448436

449437
if M_T is None or Σ_T is None: # pragma: no cover
450438
T, M_T, Σ_T = core.preprocess(T, m)
@@ -456,28 +444,13 @@ def max_distance(D):
456444
D = np.empty((d, n - m + 1))
457445
for i in range(d):
458446
D[i, :] = core.mass(Q[i], T[i], M_T[i], Σ_T[i])
459-
460447
D = np.mean(D, axis=0)
461-
if not isinstance(max_distance, float):
462-
max_distance = max_distance(D)
463-
464-
matches = []
465-
466-
if query_idx is not None:
467-
candidate_idx = query_idx
468-
else:
469-
candidate_idx = np.argmin(D)
470-
471-
for i in range(len(D)):
472-
if (
473-
D[candidate_idx] > atol + max_distance
474-
or ~np.isfinite(D[candidate_idx])
475-
or len(matches) >= max_matches
476-
):
477-
break
478-
479-
matches.append([D[candidate_idx], candidate_idx])
480-
core.apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
481-
candidate_idx = np.argmin(D)
482448

483-
return np.array(matches, dtype=object)
449+
return core._find_matches(
450+
D,
451+
excl_zone,
452+
max_distance=max_distance,
453+
max_matches=max_matches,
454+
query_idx=query_idx,
455+
atol=atol,
456+
)

stumpy/mpdist.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,14 @@ def _mpdist_vect(
192192
Time series or sequence
193193
194194
m : int
195-
Window size
195+
Window size that will be used for calculating the mpdist between Q and
196+
any subsequence in T (of size `len(Q)`)
196197
197-
μ_Q : float
198-
Mean of `Q`
198+
μ_Q : numpy.ndarray
199+
Sliding mean of `Q`
199200
200-
σ_Q : float
201-
Standard deviation of `Q`
201+
σ_Q : numpy.ndarray
202+
Sliding standard deviation of `Q`
202203
203204
M_T : numpy.ndarray
204205
Sliding mean of `T`
@@ -222,6 +223,13 @@ def _mpdist_vect(
222223
and should take `P_ABBA` as its only input parameter and return a single
223224
`MPdist` value. The `percentage` and `k` parameters are ignored when
224225
`custom_func` is not None.
226+
227+
Returns
228+
-------
229+
MPdist_vect : numpy.ndarray
230+
The mpdist-based distance profile of `Q` with `T`. It is a 1D array of
231+
size `len(T) - len(Q) + 1`. MPdist_vect[i] is the mpdist distance between
232+
`Q` and subsequence `T[i : i + len(Q)]`.
225233
"""
226234
j = Q.shape[0] - m + 1 # `k` is reserved for `P_ABBA` selection
227235
l = T.shape[0] - m + 1

stumpy/scraamp.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,10 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0):
325325
l = n_A - m + 1
326326

327327
if s is None: # pragma: no cover
328-
s = excl_zone
328+
if excl_zone is not None: # self-join
329+
s = excl_zone
330+
else: # AB-join
331+
s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
329332

330333
indices = np.random.permutation(range(0, l, s)).astype(np.int64)
331334
P, I = _prescraamp(
@@ -509,9 +512,11 @@ def __init__(
509512
self._I[:, :] = -1
510513

511514
self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
512-
513515
if s is None:
514-
s = self._excl_zone
516+
if self._excl_zone is not None: # self-join
517+
s = self._excl_zone
518+
else: # pragma: no cover # AB-join
519+
s = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
515520

516521
if pre_scraamp:
517522
if self._ignore_trivial:

stumpy/scrump.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0):
340340
l = n_A - m + 1
341341

342342
if s is None: # pragma: no cover
343-
s = excl_zone
343+
if excl_zone is not None: # self-join
344+
s = excl_zone
345+
else: # AB-join
346+
s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
344347

345348
indices = np.random.permutation(range(0, l, s)).astype(np.int64)
346349
P, I = _prescrump(
@@ -578,9 +581,11 @@ def __init__(
578581
self._I[:, :] = -1
579582

580583
self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
581-
582584
if s is None:
583-
s = self._excl_zone
585+
if self._excl_zone is not None: # self-join
586+
s = self._excl_zone
587+
else: # pragma: no cover # AB-join
588+
s = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))
584589

585590
if pre_scrump:
586591
if self._ignore_trivial:

tests/naive.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,3 +1759,25 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
17591759
)
17601760

17611761
return total_ndists
1762+
1763+
1764+
def find_matches(D, excl_zone, max_distance, max_matches=None):
1765+
if max_matches is None:
1766+
max_matches = len(D)
1767+
1768+
matches = []
1769+
for i in range(D.size):
1770+
dist = D[i]
1771+
if dist <= max_distance:
1772+
matches.append(i)
1773+
1774+
# Removes indices that are inside the exclusion zone of some occurrence with
1775+
# a smaller distance to the query
1776+
matches.sort(key=lambda x: D[x])
1777+
result = []
1778+
while len(matches) > 0:
1779+
idx = matches[0]
1780+
result.append([D[idx], idx])
1781+
matches = [x for x in matches if x < idx - excl_zone or x > idx + excl_zone]
1782+
1783+
return np.array(result[:max_matches], dtype=object)

0 commit comments

Comments
 (0)