Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Fixing RDST Ensemble n_jobs, adding length bounds and dilation scheme parameter #23

Merged
merged 2 commits into from
Nov 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions PaperScripts/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@
import pandas as pd
import numpy as np

from convst.utils.dataset_utils import return_all_dataset_names
from convst.utils.dataset_utils import return_all_dataset_names, return_all_univariate_dataset_names
from convst.utils.experiments_utils import cross_validate_UCR_UEA

from convst.classifiers import R_DST_Ensemble
from convst.classifiers import R_DST_Ensemble, R_DST_Ridge

print("Imports OK")
#n_cv = 1 to test only on original train test split.
n_cv=30

csv_name = 'CV_{}_results_multivariate_ensemble2.csv'.format(
csv_name = 'CV_{}_results_prime_bounds_phase.csv'.format(
n_cv)

# List of datasets to test, here, use all datasets ones, (univariate,
# multivariate, variable length, etc...) see dataset_utils for other choices.
dataset_names = return_all_dataset_names()
dataset_names = return_all_univariate_dataset_names()

# List of models to test
dict_models = {
"R_DST_Ensemble": R_DST_Ensemble,
"R_DST": R_DST_Ridge,
"R_DST_Ensemble": R_DST_Ensemble
}

resume=False
Expand All @@ -39,14 +40,21 @@
print("Compiling {}".format(model_name))
X = np.random.rand(5,3,50)
y = np.array([0,0,1,1,1])
model_class(n_shapelets_per_estimator=1).fit(X,y).predict(X)
if model_name == 'R_DST_Ensemble':
model_class(n_shapelets_per_estimator=1).fit(X,y).predict(X)
if model_name == 'R_DST_Ridge':
model_class(n_shapelets=1).fit(X,y).predict(X)

i_df=0
for name in dataset_names:
print(name)
for model_name, model_class in dict_models.items():
print(model_name)
if pd.isna(df.loc[i_df, 'acc_mean']) or df.loc[i_df, 'acc_mean'] == 0.0:
pipeline = model_class(n_jobs=-1, phase_invariance=True)
pipeline = model_class(
n_jobs=-1, phase_invariance=True,
prime_dilations=True, shapelet_lengths_bounds=[0.001, 0.25]
)

#By default, we use accuracy as score, but other scorer can be passed
#as parameters (e.g. by default scorers={"accuracy":accuracy_score})
Expand Down
14 changes: 11 additions & 3 deletions convst/classifiers/rdst_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def __init__(
self,
n_shapelets_per_estimator=10000,
shapelet_lengths=[11],
shapelet_lengths_bounds=None,
lengths_bounds_reduction=0.5,
prime_dilations=False,
n_samples=None,
n_jobs=1,
backend="processes",
Expand All @@ -74,6 +77,9 @@ def __init__(
self.n_shapelets_per_estimator=n_shapelets_per_estimator
self.shapelet_lengths=shapelet_lengths
self.n_jobs = n_jobs
self.shapelet_lengths_bounds=shapelet_lengths_bounds
self.lengths_bounds_reduction=lengths_bounds_reduction
self.prime_dilations=prime_dilations
self.backend=backend
self.random_state = random_state
self.n_samples=n_samples
Expand Down Expand Up @@ -101,7 +107,6 @@ def fit(self, X, y):
Derivate(),
Periodigram()
]

models = Parallel(
n_jobs=self.n_jobs,
prefer=self.backend,
Expand All @@ -113,9 +118,12 @@ def fit(self, X, y):
R_DST(
n_shapelets=self.n_shapelets_per_estimator,
alpha=self.shp_alpha, n_samples=self.n_samples,
proba_norm=self.proba_norm[i], n_jobs=self.n_jobs_rdst,
proba_norm=self.proba_norm[i], n_jobs=-1,
shapelet_lengths=self.shapelet_lengths,
phase_invariance=self.phase_invariance
phase_invariance=self.phase_invariance,
prime_dilations=self.prime_dilations,
shapelet_lengths_bounds=self.shapelet_lengths_bounds,
lengths_bounds_reduction=self.lengths_bounds_reduction
),
_internalRidgeCV()
)
Expand Down
9 changes: 9 additions & 0 deletions convst/classifiers/rdst_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def __init__(
n_samples=None,
n_shapelets=10_000,
shapelet_lengths=[11],
shapelet_lengths_bounds=None,
lengths_bounds_reduction=0.5,
prime_dilations=False,
proba_norm=0.8,
percentiles=[5,10],
n_jobs=1,
Expand All @@ -81,10 +84,13 @@ def __init__(
self.fit_intercept=fit_intercept
self.transform_type=transform_type
self.phase_invariance=phase_invariance
self.prime_dilations=prime_dilations
self.distance=distance
self.alpha=alpha
self.normalize_output=normalize_output
self.n_samples=n_samples
self.shapelet_lengths_bounds=shapelet_lengths_bounds
self.lengths_bounds_reduction=lengths_bounds_reduction
self.n_shapelets=n_shapelets
self.shapelet_lengths=shapelet_lengths
self.proba_norm=proba_norm
Expand Down Expand Up @@ -114,6 +120,9 @@ def _init_components(self):
phase_invariance=self.phase_invariance,
distance=self.distance,
alpha=self.alpha,
prime_dilations=self.prime_dilations,
shapelet_lengths_bounds=self.shapelet_lengths_bounds,
lengths_bounds_reduction=self.lengths_bounds_reduction,
normalize_output=self.normalize_output,
n_samples=self.n_samples,
n_shapelets=self.n_shapelets,
Expand Down
20 changes: 18 additions & 2 deletions convst/transformers/_commons.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

from numba import njit, prange
from numpy import float_, sqrt, zeros, unique, bool_, where, int64
from numpy import float_, sqrt, zeros, unique, bool_, where, int64, all as _all

###############################################################################
# #
Expand Down Expand Up @@ -572,4 +572,20 @@ def _combinations_1d(x,y):
combinations[i_comb,1] = y[i]
u_mask[where(u_x==x[i])[0][0],where(u_y==y[i])[0][0]] = False
i_comb += 1
return combinations
return combinations

@njit(cache=True)
def prime_up_to(n):
is_p = zeros(n, dtype=bool_)
for i in range(n):
is_p[i] = is_prime(i)
return where(is_p)[0]

@njit(cache=True)
def is_prime(n):
if (n % 2 == 0 and n > 2) or n == 0:
return False
for i in range(3, int64(sqrt(n)) + 1, 2):
if not n % i:
return False
return True
24 changes: 15 additions & 9 deletions convst/transformers/_multivariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from convst.transformers._commons import (
get_subsequence, compute_shapelet_dist_vector,
apply_one_shapelet_one_sample_multivariate, _combinations_1d,
generate_strides_2D
generate_strides_2D, prime_up_to
)

from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
):
"""
Initialize the parameters of the shapelets.
Expand Down Expand Up @@ -58,11 +58,17 @@ def _init_random_shapelet_params(

# Dilations
upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

if prime_scheme:
primes = prime_up_to(int64(2**upper_bounds.max()))
dilations = zeros(n_shapelets, dtype=int64)
for i in prange(n_shapelets):
dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
else:
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

# Init threshold array
threshold = zeros(n_shapelets)

Expand All @@ -87,7 +93,7 @@ def _init_random_shapelet_params(
@njit(cache=True, parallel=True)
def M_SL_generate_shapelet(
X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
dist_func, use_phase, max_channels
dist_func, use_phase, max_channels, prime_scheme
):
"""
Given a time series dataset and parameters of the method, generate the
Expand Down Expand Up @@ -142,7 +148,7 @@ def M_SL_generate_shapelet(
#Initialize shapelets
values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
)
#Initialize self similarity mask
unique_dil = unique(dilations)
Expand Down
22 changes: 14 additions & 8 deletions convst/transformers/_multivariate_variable_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from convst.transformers._commons import (
get_subsequence, compute_shapelet_dist_vector,
apply_one_shapelet_one_sample_multivariate, _combinations_1d,
generate_strides_2D
generate_strides_2D, prime_up_to
)

from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
):
"""
Initialize the parameters of the shapelets.
Expand Down Expand Up @@ -58,10 +58,16 @@ def _init_random_shapelet_params(

# Dilations
upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)
if prime_scheme:
primes = prime_up_to(int64(2**upper_bounds.max()))
dilations = zeros(n_shapelets, dtype=int64)
for i in prange(n_shapelets):
dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
else:
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

# Init threshold array
threshold = zeros(n_shapelets)
Expand All @@ -87,7 +93,7 @@ def _init_random_shapelet_params(
@njit(cache=True, parallel=True)
def M_VL_generate_shapelet(
X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
dist_func, use_phase, max_channels, min_len, X_len
dist_func, use_phase, max_channels, min_len, X_len, prime_scheme
):
"""
Given a time series dataset and parameters of the method, generate the
Expand Down Expand Up @@ -147,7 +153,7 @@ def M_VL_generate_shapelet(
#Initialize shapelets
values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
_init_random_shapelet_params(
n_shapelets, shapelet_sizes, min_len, p_norm, max_channels,
n_shapelets, shapelet_sizes, min_len, p_norm, max_channels, prime_scheme
)
#Initialize self similarity mask
unique_dil = unique(dilations)
Expand Down
31 changes: 19 additions & 12 deletions convst/transformers/_univariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from convst.transformers._commons import (
get_subsequence, compute_shapelet_dist_vector,
apply_one_shapelet_one_sample_univariate, _combinations_1d,
generate_strides_1D
generate_strides_1D, prime_up_to
)

from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm
n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
):
"""
Initialize the parameters of the shapelets.
Expand Down Expand Up @@ -51,20 +51,27 @@ def _init_random_shapelet_params(
"""
# Lengths of the shapelets
lengths = choice(shapelet_sizes, size=n_shapelets).astype(int64)

# Dilations
upper_bounds = log2(floor_divide(n_timestamps - 1, lengths - 1))
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

if prime_scheme:
primes = prime_up_to(int64(2**upper_bounds.max()))
dilations = zeros(n_shapelets, dtype=int64)
for i in prange(n_shapelets):
dilations[i] = choice(primes[primes<=int64(2**upper_bounds[i])])
else:
powers = zeros(n_shapelets)
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

#PRIME DILATION
# Init threshold array
threshold = zeros(n_shapelets)

# Init values array
values = zeros((n_shapelets, max(shapelet_sizes)))

# Is shapelet using z-normalization ?
normalize = random(size=n_shapelets)
normalize = (normalize < p_norm)
Expand All @@ -74,7 +81,7 @@ def _init_random_shapelet_params(
@njit(cache=True, parallel=True)
def U_SL_generate_shapelet(
X, y, n_shapelets, shapelet_sizes, r_seed, p_norm, p_min, p_max, alpha,
dist_func, use_phase
dist_func, use_phase, prime_scheme
):
"""
Given a time series dataset and parameters of the method, generate the
Expand Down Expand Up @@ -129,7 +136,7 @@ def U_SL_generate_shapelet(
#Initialize shapelets
values, lengths, dilations, threshold, normalize = \
_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm
n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
)
#Initialize self similarity mask
unique_dil = unique(dilations)
Expand Down
Loading