-
Notifications
You must be signed in to change notification settings - Fork 166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Inconsistent calculation results between tslearn, pyts and fastdtw #76
Comments
Hi, I updated the computation of the constraint regions in tslearn in PR #104 . Later, PR #31 by Hicham Janati updated the computation of the constraint regions in pyts so that they are tighter when the two time series have different lengths. I don't think that there is a consensus on the definition of the regions for time series with different lengths. Here is an illustration for pyts and tslearn with a time series of length 8 and another one of length 4: In the figures I plot the lines that define the constraint regions. Any point inside the lines is a valid point, any point outside the lines is not valid. I also plot the diagonale in black. For Sakoe-Chiba bands, you can see that for Likewise for Itakura parallelograms, pyts returns a tighter region than tslearn. Note that the For the FastDTW algorithm, I don't have checked the code of the If you see differences between packages regarding DTW computations, it is very likely that they come from the computation of the constraint regions. Hope this helps a bit and sorry for the delay. Code for Sakoe-Chiba band figureimport matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import sakoe_chiba_band
from pyts.metrics.dtw import _check_sakoe_chiba_params
from tslearn.metrics import sakoe_chiba_mask
def plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
"""Plot the Sakoe-Chiba band."""
region = sakoe_chiba_band(n_timestamps_1, n_timestamps_2, window_size)
scale, horizontal_shift, vertical_shift = \
_check_sakoe_chiba_params(n_timestamps_1, n_timestamps_2, window_size)
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.
plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
lower_bound = scale * (x - horizontal_shift) - vertical_shift
upper_bound = scale * (x + horizontal_shift) + vertical_shift
plt.plot(x, lower_bound, 'b', lw=2)
plt.plot(x, upper_bound, 'g', lw=2)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
def plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
"""Plot the Sakoe-Chiba band."""
vertical_shift = window_size
mask = sakoe_chiba_mask(n_timestamps_1, n_timestamps_2, window_size)
mask[mask == 0.] = 1.
mask[np.isinf(mask)] = 0.
mask = mask.T
plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
lower_bound = x - vertical_shift - abs(n_timestamps_1 - n_timestamps_2)
upper_bound = x + vertical_shift
plt.plot(x, lower_bound, 'b', lw=2)
plt.plot(x, upper_bound, 'g', lw=2)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
n_timestamps_1, n_timestamps_2 = 8, 4
plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('pyts, window-size = 0', fontsize=18)
ax = plt.subplot(2, 2, 2)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('pyts, window-size = 2', fontsize=18)
ax = plt.subplot(2, 2, 3)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('tslearn, window-size = 0', fontsize=18)
ax = plt.subplot(2, 2, 4)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('tslearn, window-size = 2', fontsize=18)
plt.suptitle('Sakoe-Chiba band', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3) Code for Itakura parallelogram figureimport matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import itakura_parallelogram
from pyts.metrics.dtw import _get_itakura_slopes
from tslearn.metrics import itakura_mask
def plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
"""Plot Itakura parallelogram."""
region = itakura_parallelogram(n_timestamps_1, n_timestamps_2, max_slope)
max_slope, min_slope = _get_itakura_slopes(
n_timestamps_1, n_timestamps_2, max_slope)
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.
plt.imshow(mask, origin='lower', cmap='Wistia')
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
max_slope * np.arange(-1, sz + 1)
up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
min_slope * np.arange(-1, sz + 1)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
plt.plot(x, low_max_line, 'g', lw=1.5)
plt.plot(x, up_min_line, 'y', lw=1.5)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
def plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
"""Plot Itakura parallelogram."""
mask = itakura_mask(n_timestamps_1, n_timestamps_2, max_slope)
mask[mask == 0.] = 1.
mask[np.isinf(mask)] = 0.
mask = mask.T
plt.imshow(mask, origin='lower', cmap='Wistia')
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
ratio = (n_timestamps_1 - 1) / (n_timestamps_2 - 1)
max_slope *= ratio
min_slope = 1 / max_slope
low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
max_slope * np.arange(-1, sz + 1)
up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
min_slope * np.arange(-1, sz + 1)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
plt.plot(x, low_max_line, 'g', lw=1.5)
plt.plot(x, up_min_line, 'y', lw=1.5)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
n_timestamps_1, n_timestamps_2 = 8, 4
plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=ax)
plt.title('pyts, max-slope = 1.', fontsize=18)
ax = plt.subplot(2, 2, 2)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('pyts, max-slope = 2.', fontsize=18)
ax = plt.subplot(2, 2, 3)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('tslearn, max-slope = 2.', fontsize=18)
ax = plt.subplot(2, 2, 4)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=4., ax=ax)
plt.title('tslearn, max-slope = 4.', fontsize=18)
plt.suptitle('Itakura parallelogram', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3) |
Description
Please forgive me for my poor English since English is not my native language.
I have read the source code and Sakoe_Chiba band generation examples from tslearn, pyts. The generation manner of the Sakoe_Chiba seems different between pyts and tslearn, which leads to different calculation results when comparing 2 sequences with different lengths. I have also read the source code of fastdtw(https://pypi.org/project/fastdtw/) when the radius parameter is different, the calculation results between fastdtw and pyts are also varied.
Codes
Versions
NumPy 1.18.1
SciPy 1.4.1
Scikit-Learn 0.22.1
Numba 0.49.1
Pyts 0.11.0
tslearn: '0.4.1'
fastdtw: See pypi
The text was updated successfully, but these errors were encountered: