baraline · baraline · Nov 5, 2022 · Nov 5, 2022 · Nov 5, 2022
diff --git a/PaperScripts/test_models.py b/PaperScripts/test_models.py
@@ -3,25 +3,26 @@
 import pandas as pd
 import numpy as np
 
-from convst.utils.dataset_utils import return_all_dataset_names
+from convst.utils.dataset_utils import return_all_dataset_names, return_all_univariate_dataset_names
 from convst.utils.experiments_utils import cross_validate_UCR_UEA
 
-from convst.classifiers import R_DST_Ensemble
+from convst.classifiers import R_DST_Ensemble, R_DST_Ridge
 
 print("Imports OK")
 #n_cv = 1 to test only on original train test split.
 n_cv=30
 
-csv_name = 'CV_{}_results_multivariate_ensemble2.csv'.format(
+csv_name = 'CV_{}_results_prime_bounds_phase.csv'.format(
     n_cv)
 
 # List of datasets to test, here, use all datasets ones, (univariate,
 # multivariate, variable length, etc...) see dataset_utils for other choices.
-dataset_names = return_all_dataset_names()
+dataset_names = return_all_univariate_dataset_names()
 
 # List of models to test
 dict_models = {
-    "R_DST_Ensemble": R_DST_Ensemble,
+    "R_DST": R_DST_Ridge,
+    "R_DST_Ensemble": R_DST_Ensemble
 }
 
 resume=False
@@ -39,14 +40,21 @@
     print("Compiling {}".format(model_name))
     X = np.random.rand(5,3,50)
     y = np.array([0,0,1,1,1])
-    model_class(n_shapelets_per_estimator=1).fit(X,y).predict(X)
+    if model_name == 'R_DST_Ensemble':
+        model_class(n_shapelets_per_estimator=1).fit(X,y).predict(X)
+    if model_name == 'R_DST_Ridge':
+        model_class(n_shapelets=1).fit(X,y).predict(X)
 
 i_df=0
 for name in dataset_names:
     print(name)
     for model_name, model_class in dict_models.items():
+        print(model_name)
         if pd.isna(df.loc[i_df, 'acc_mean']) or df.loc[i_df, 'acc_mean'] == 0.0:
-            pipeline = model_class(n_jobs=-1, phase_invariance=True)
+            pipeline = model_class(
+                n_jobs=-1, phase_invariance=True,
+                prime_dilations=True, shapelet_lengths_bounds=[0.001, 0.25]
+            )
 
             #By default, we use accuracy as score, but other scorer can be passed
             #as parameters (e.g. by default scorers={"accuracy":accuracy_score})

diff --git a/convst/classifiers/rdst_ensemble.py b/convst/classifiers/rdst_ensemble.py
@@ -62,6 +62,9 @@ def __init__(
         self,
         n_shapelets_per_estimator=10000,
         shapelet_lengths=[11],
+        shapelet_lengths_bounds=None,
+        lengths_bounds_reduction=0.5,
+        prime_dilations=False,
         n_samples=None,
         n_jobs=1,
         backend="processes",
@@ -74,6 +77,9 @@ def __init__(
         self.n_shapelets_per_estimator=n_shapelets_per_estimator
         self.shapelet_lengths=shapelet_lengths
         self.n_jobs = n_jobs
+        self.shapelet_lengths_bounds=shapelet_lengths_bounds
+        self.lengths_bounds_reduction=lengths_bounds_reduction
+        self.prime_dilations=prime_dilations
         self.backend=backend
         self.random_state = random_state
         self.n_samples=n_samples
@@ -101,7 +107,6 @@ def fit(self, X, y):
             Derivate(),
             Periodigram()
         ]
-
         models = Parallel(
             n_jobs=self.n_jobs,
             prefer=self.backend,
@@ -113,9 +118,12 @@ def fit(self, X, y):
                     R_DST(
                         n_shapelets=self.n_shapelets_per_estimator,
                         alpha=self.shp_alpha, n_samples=self.n_samples, 
-                        proba_norm=self.proba_norm[i], n_jobs=self.n_jobs_rdst,
+                        proba_norm=self.proba_norm[i], n_jobs=-1,
                         shapelet_lengths=self.shapelet_lengths,
-                        phase_invariance=self.phase_invariance
+                        phase_invariance=self.phase_invariance,
+                        prime_dilations=self.prime_dilations,
+                        shapelet_lengths_bounds=self.shapelet_lengths_bounds,
+                        lengths_bounds_reduction=self.lengths_bounds_reduction
                     ),
                     _internalRidgeCV()
                 )

diff --git a/convst/classifiers/rdst_ridge.py b/convst/classifiers/rdst_ridge.py
@@ -67,6 +67,9 @@ def __init__(
         n_samples=None,
         n_shapelets=10_000,
         shapelet_lengths=[11],
+        shapelet_lengths_bounds=None,
+        lengths_bounds_reduction=0.5,
+        prime_dilations=False,
         proba_norm=0.8,
         percentiles=[5,10],
         n_jobs=1,
@@ -81,10 +84,13 @@ def __init__(
         self.fit_intercept=fit_intercept
         self.transform_type=transform_type
         self.phase_invariance=phase_invariance
+        self.prime_dilations=prime_dilations
         self.distance=distance
         self.alpha=alpha
         self.normalize_output=normalize_output
         self.n_samples=n_samples
+        self.shapelet_lengths_bounds=shapelet_lengths_bounds
+        self.lengths_bounds_reduction=lengths_bounds_reduction
         self.n_shapelets=n_shapelets
         self.shapelet_lengths=shapelet_lengths
         self.proba_norm=proba_norm
@@ -114,6 +120,9 @@ def _init_components(self):
             phase_invariance=self.phase_invariance,
             distance=self.distance,
             alpha=self.alpha,
+            prime_dilations=self.prime_dilations,
+            shapelet_lengths_bounds=self.shapelet_lengths_bounds,
+            lengths_bounds_reduction=self.lengths_bounds_reduction,
             normalize_output=self.normalize_output,
             n_samples=self.n_samples,
             n_shapelets=self.n_shapelets,

diff --git a/convst/transformers/_commons.py b/convst/transformers/_commons.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from numba import njit, prange
-from numpy import float_, sqrt, zeros, unique, bool_, where, int64
+from numpy import float_, sqrt, zeros, unique, bool_, where, int64, all as _all
 
 ###############################################################################
 #                                                                             #
@@ -572,4 +572,20 @@ def _combinations_1d(x,y):
             combinations[i_comb,1] = y[i]
             u_mask[where(u_x==x[i])[0][0],where(u_y==y[i])[0][0]] = False
             i_comb += 1
-    return combinations
+    return combinations
+
+@njit(cache=True)
+def prime_up_to(n):
+    is_p = zeros(n, dtype=bool_)
+    for i in range(n):
+        is_p[i] = is_prime(i)
+    return where(is_p)[0]
+
+@njit(cache=True)
+def is_prime(n):
+    if (n % 2 == 0 and n > 2) or n == 0: 
+        return False
+    for i in range(3, int64(sqrt(n)) + 1, 2):
+        if not n % i:
+            return False 
+    return True
diff --git a/convst/transformers/_multivariate_same_length.py b/convst/transformers/_multivariate_same_length.py
@@ -12,14 +12,14 @@
 from convst.transformers._commons import (
     get_subsequence, compute_shapelet_dist_vector,
     apply_one_shapelet_one_sample_multivariate, _combinations_1d,
-    generate_strides_2D
+    generate_strides_2D, prime_up_to
 )
 
 from numba import njit, prange
 
 @njit(cache=True)
 def _init_random_shapelet_params(
-    n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
+    n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
 ):
     """
     Initialize the parameters of the shapelets.    
@@ -58,11 +58,17 @@ def _init_random_shapelet_params(
 
     # Dilations
     upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
-    powers = zeros(n_shapelets)
-    for i in prange(n_shapelets):
-        powers[i] = uniform(0, upper_bounds[i])
-    dilations = floor(power(2, powers)).astype(int64)
-
+    if prime_scheme:
+        primes = prime_up_to(int64(2**upper_bounds.max()))
+        dilations = zeros(n_shapelets, dtype=int64)
+        for i in prange(n_shapelets):
+            dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
+    else:
+        powers = zeros(n_shapelets)
+        for i in prange(n_shapelets):
+            powers[i] = uniform(0, upper_bounds[i])
+        dilations = floor(power(2, powers)).astype(int64)
+
     # Init threshold array
     threshold = zeros(n_shapelets)
 
@@ -87,7 +93,7 @@ def _init_random_shapelet_params(
 @njit(cache=True, parallel=True)
 def M_SL_generate_shapelet(
     X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
-    dist_func, use_phase, max_channels
+    dist_func, use_phase, max_channels, prime_scheme
 ):
     """
     Given a time series dataset and parameters of the method, generate the
@@ -142,7 +148,7 @@ def M_SL_generate_shapelet(
     #Initialize shapelets
     values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
     _init_random_shapelet_params(
-        n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
+        n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
     )
     #Initialize self similarity mask
     unique_dil = unique(dilations)

diff --git a/convst/transformers/_multivariate_variable_length.py b/convst/transformers/_multivariate_variable_length.py
@@ -12,14 +12,14 @@
 from convst.transformers._commons import (
     get_subsequence, compute_shapelet_dist_vector,
     apply_one_shapelet_one_sample_multivariate, _combinations_1d,
-    generate_strides_2D
+    generate_strides_2D, prime_up_to
 )
 
 from numba import njit, prange
 
 @njit(cache=True)
 def _init_random_shapelet_params(
-    n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
+    n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
 ):
     """
     Initialize the parameters of the shapelets.    
@@ -58,10 +58,16 @@ def _init_random_shapelet_params(
 
     # Dilations
     upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
-    powers = zeros(n_shapelets)
-    for i in prange(n_shapelets):
-        powers[i] = uniform(0, upper_bounds[i])
-    dilations = floor(power(2, powers)).astype(int64)
+    if prime_scheme:
+        primes = prime_up_to(int64(2**upper_bounds.max()))
+        dilations = zeros(n_shapelets, dtype=int64)
+        for i in prange(n_shapelets):
+            dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
+    else:
+        powers = zeros(n_shapelets)
+        for i in prange(n_shapelets):
+            powers[i] = uniform(0, upper_bounds[i])
+        dilations = floor(power(2, powers)).astype(int64)
 
     # Init threshold array
     threshold = zeros(n_shapelets)
@@ -87,7 +93,7 @@ def _init_random_shapelet_params(
 @njit(cache=True, parallel=True)
 def M_VL_generate_shapelet(
     X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
-    dist_func, use_phase, max_channels, min_len, X_len
+    dist_func, use_phase, max_channels, min_len, X_len, prime_scheme
 ):
     """
     Given a time series dataset and parameters of the method, generate the
@@ -147,7 +153,7 @@ def M_VL_generate_shapelet(
     #Initialize shapelets
     values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
     _init_random_shapelet_params(
-        n_shapelets, shapelet_sizes, min_len, p_norm, max_channels, 
+        n_shapelets, shapelet_sizes, min_len, p_norm, max_channels, prime_scheme
     )
     #Initialize self similarity mask
     unique_dil = unique(dilations)

diff --git a/convst/transformers/_univariate_same_length.py b/convst/transformers/_univariate_same_length.py
@@ -11,14 +11,14 @@
 from convst.transformers._commons import (
     get_subsequence, compute_shapelet_dist_vector,
     apply_one_shapelet_one_sample_univariate, _combinations_1d,
-    generate_strides_1D
+    generate_strides_1D, prime_up_to
 )
 
 from numba import njit, prange
 
 @njit(cache=True)
 def _init_random_shapelet_params(
-    n_shapelets, shapelet_sizes, n_timestamps, p_norm
+    n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
 ):
     """
     Initialize the parameters of the shapelets.    
@@ -51,20 +51,27 @@ def _init_random_shapelet_params(
     """
     # Lengths of the shapelets
     lengths = choice(shapelet_sizes, size=n_shapelets).astype(int64)
-
+    
     # Dilations
     upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
-    powers = zeros(n_shapelets)
-    for i in prange(n_shapelets):
-        powers[i] = uniform(0, upper_bounds[i])
-    dilations = floor(power(2, powers)).astype(int64)
-
+    if prime_scheme:
+        primes = prime_up_to(int64(2**upper_bounds.max()))
+        dilations = zeros(n_shapelets, dtype=int64)
+        for i in prange(n_shapelets):
+            dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
+    else:
+        powers = zeros(n_shapelets)
+        for i in prange(n_shapelets):
+            powers[i] = uniform(0, upper_bounds[i])
+        dilations = floor(power(2, powers)).astype(int64)
+
+    #PRIME DILATION    
     # Init threshold array
     threshold = zeros(n_shapelets)
-
+    
     # Init values array
     values = zeros((n_shapelets, max(shapelet_sizes)))
-
+    
     # Is shapelet using z-normalization ?
     normalize = random(size=n_shapelets)
     normalize = (normalize < p_norm)
@@ -74,7 +81,7 @@ def _init_random_shapelet_params(
 @njit(cache=True, parallel=True)
 def U_SL_generate_shapelet(
     X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
-    dist_func, use_phase
+    dist_func, use_phase, prime_scheme
 ):
     """
     Given a time series dataset and parameters of the method, generate the
@@ -129,7 +136,7 @@ def U_SL_generate_shapelet(
     #Initialize shapelets
     values, lengths, dilations, threshold, normalize = \
     _init_random_shapelet_params(
-        n_shapelets, shapelet_sizes, n_timestamps, p_norm
+        n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
     )
     #Initialize self similarity mask
     unique_dil = unique(dilations)