Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

Commit

Permalink
Merge pull request #25 from baraline/21-bug-alpha-similarity-with-mul…
Browse files Browse the repository at this point in the history
…tiple-input-lengths

Correction n_jobs setting, ambiguous names and prime generation
  • Loading branch information
baraline authored Nov 9, 2022
2 parents 6fa02d5 + bc39e37 commit 47869a6
Show file tree
Hide file tree
Showing 12 changed files with 103 additions and 42 deletions.
2 changes: 1 addition & 1 deletion convst/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

__author__ = 'Antoine Guillaume antoine.guillaume45@gmail.com'
__version__ = "0.2.2"
__version__ = "0.2.3"

__all__ = ['transformers', 'classifiers', 'utils', 'interpreters']
4 changes: 3 additions & 1 deletion convst/classifiers/rdst_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

from numba import set_num_threads

class _internalRidgeCV(RidgeClassifierCV):
def __init__(self, **kwargs):
Expand Down Expand Up @@ -107,6 +108,7 @@ def fit(self, X, y):
Derivate(),
Periodigram()
]
set_num_threads(self.n_jobs_rdst)
models = Parallel(
n_jobs=self.n_jobs,
prefer=self.backend,
Expand All @@ -118,7 +120,7 @@ def fit(self, X, y):
R_DST(
n_shapelets=self.n_shapelets_per_estimator,
alpha=self.shp_alpha, n_samples=self.n_samples,
proba_norm=self.proba_norm[i], n_jobs=-1,
proba_norm=self.proba_norm[i], n_jobs=False,
shapelet_lengths=self.shapelet_lengths,
phase_invariance=self.phase_invariance,
prime_dilations=self.prime_dilations,
Expand Down
13 changes: 10 additions & 3 deletions convst/classifiers/rdst_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
from convst.transformers._input_transformers import c_StandardScaler
from convst.transformers import R_DST

from convst.utils.checks_utils import check_n_jobs
from sklearn.metrics import accuracy_score

from numba import set_num_threads

class R_DST_Ridge(BaseEstimator, ClassifierMixin):
"""
A wrapper class which use R_DST as a transformer, followed by a Ridge
Expand Down Expand Up @@ -95,10 +98,14 @@ def __init__(
self.shapelet_lengths=shapelet_lengths
self.proba_norm=proba_norm
self.percentiles=percentiles
self.n_jobs=n_jobs
if isinstance(n_jobs, bool):
self.n_jobs=n_jobs
else:
self.n_jobs=check_n_jobs(n_jobs)
set_num_threads(self.n_jobs)
self.random_state=random_state
self.min_len=min_len

def _more_tags(self):
return {
"capability:variable_length": True,
Expand Down Expand Up @@ -126,10 +133,10 @@ def _init_components(self):
normalize_output=self.normalize_output,
n_samples=self.n_samples,
n_shapelets=self.n_shapelets,
n_jobs=False,
shapelet_lengths=self.shapelet_lengths,
proba_norm=self.proba_norm,
percentiles=self.percentiles,
n_jobs=self.n_jobs,
random_state=self.random_state,
min_len=self.min_len
)
Expand Down
6 changes: 3 additions & 3 deletions convst/transformers/_commons.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

from numba import njit, prange
from numpy import float_, sqrt, zeros, unique, bool_, where, int64, all as _all
from numpy import float_, sqrt, zeros, unique, bool_, where, int64

###############################################################################
# #
Expand Down Expand Up @@ -576,8 +576,8 @@ def _combinations_1d(x,y):

@njit(cache=True)
def prime_up_to(n):
is_p = zeros(n, dtype=bool_)
for i in range(n):
is_p = zeros(n+1, dtype=bool_)
for i in range(n+1):
is_p[i] = is_prime(i)
return where(is_p)[0]

Expand Down
4 changes: 2 additions & 2 deletions convst/transformers/_multivariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
def M_SL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
):
"""
Expand Down Expand Up @@ -147,7 +147,7 @@ def M_SL_generate_shapelet(

#Initialize shapelets
values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
_init_random_shapelet_params(
M_SL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
)
#Initialize self similarity mask
Expand Down
4 changes: 2 additions & 2 deletions convst/transformers/_multivariate_variable_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
def M_VL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, max_channels, prime_scheme
):
"""
Expand Down Expand Up @@ -152,7 +152,7 @@ def M_VL_generate_shapelet(

#Initialize shapelets
values, lengths, dilations, threshold, normalize, n_channels, channel_ids = \
_init_random_shapelet_params(
M_VL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, min_len, p_norm, max_channels, prime_scheme
)
#Initialize self similarity mask
Expand Down
6 changes: 2 additions & 4 deletions convst/transformers/_univariate_same_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from numba import njit, prange

@njit(cache=True)
def _init_random_shapelet_params(
def U_SL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
):
"""
Expand Down Expand Up @@ -64,8 +64,6 @@ def _init_random_shapelet_params(
for i in prange(n_shapelets):
powers[i] = uniform(0, upper_bounds[i])
dilations = floor(power(2, powers)).astype(int64)

#PRIME DILATION
# Init threshold array
threshold = zeros(n_shapelets)

Expand Down Expand Up @@ -135,7 +133,7 @@ def U_SL_generate_shapelet(

#Initialize shapelets
values, lengths, dilations, threshold, normalize = \
_init_random_shapelet_params(
U_SL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
)
#Initialize self similarity mask
Expand Down
4 changes: 2 additions & 2 deletions convst/transformers/_univariate_variable_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# TODO : check if numba could support Tuple of variable length numpy arrays as input

@njit(cache=True)
def _init_random_shapelet_params(
def U_VL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, n_timestamps, p_norm, prime_scheme
):
"""
Expand Down Expand Up @@ -145,7 +145,7 @@ def U_VL_generate_shapelet(

#Initialize shapelets
values, lengths, dilations, threshold, normalize = \
_init_random_shapelet_params(
U_VL_init_random_shapelet_params(
n_shapelets, shapelet_sizes, min_len, p_norm, prime_scheme
)

Expand Down
40 changes: 23 additions & 17 deletions convst/transformers/rdst.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from sklearn.utils.validation import check_is_fitted, check_random_state

from convst.utils.checks_utils import (
check_array_3D, check_array_1D, check_n_jobs, check_is_numeric,
check_is_boolean
check_array_3D, check_array_1D, check_is_numeric,
check_is_boolean, check_n_jobs
)
from convst.transformers._commons import manhattan, euclidean, squared_euclidean

Expand Down Expand Up @@ -108,10 +108,10 @@ def __init__(
prime_dilations=False,
proba_norm=0.8,
percentiles=[5,10],
n_jobs=1,
random_state=None,
max_channels=None,
min_len=None
min_len=None,
n_jobs=1
):
self.transform_type = self._validate_transform_type(transform_type)
self.phase_invariance = check_is_boolean(phase_invariance)
Expand All @@ -124,18 +124,21 @@ def __init__(
if shapelet_lengths_bounds is None:
self.shapelet_lengths_bounds = None
elif len(shapelet_lengths_bounds)==2:
self.shapelet_lengths_bounds = check_array_1D(shapelet_lengths_bounds)
self.shapelet_lengths_bounds = shapelet_lengths_bounds
else:
raise ValueError('Shapelets lengths bounds should be a 1D array with 2 values')
self.lengths_bounds_reduction=check_is_numeric(lengths_bounds_reduction)
if self.lengths_bounds_reduction>=1:
raise ValueError('lengths_bounds_reduction parameter should be in range [0,1[')
self.prime_dilations = check_is_boolean(prime_dilations)
self.proba_norm = check_is_numeric(proba_norm)
self.percentiles = self._validate_percentiles(percentiles)
if n_jobs != -1:
self.n_jobs = check_n_jobs(n_jobs)
else:
self.n_jobs = n_jobs
self.random_state = check_random_state(random_state)
if isinstance(n_jobs, bool):
self.n_jobs=n_jobs
else:
self.n_jobs=check_n_jobs(n_jobs)
set_num_threads(self.n_jobs)
self.max_channels=max_channels
self.min_len=min_len

Expand All @@ -148,14 +151,20 @@ def _set_lengths(self):
else:
b0 = self.shapelet_lengths_bounds[0]
b1 = self.shapelet_lengths_bounds[1]
min_l = max(5,int(b0*self.min_len))
max_l = max(6,int(b1*self.min_len))

if isinstance(b0, float):
b0 = int(b0*self.min_len)
min_l = max(5,b0)
if isinstance(b1, float):
b1 = int(b1*self.min_len)
max_l = max(6,max(b0+1,b1+1))
#6 to ensure range 5,6 -> 5
lengths = np.asarray(list(range(min_l, max_l)))
if lengths.shape[0]>3:
n_remove = int(lengths.shape[0]*self.lengths_bounds_reduction)
step = lengths.shape[0]//n_remove
lengths = lengths[::step]
if n_remove > 0:
step = lengths.shape[0]//n_remove
lengths = lengths[::step]
return lengths

def fit(self, X, y):
Expand All @@ -173,8 +182,6 @@ def fit(self, X, y):
Class of the input time series.
"""
if self.n_jobs != -1:
set_num_threads(self.n_jobs)
self._set_fit_transform(X)
if self.transform_type in [STR_MULTIVARIATE_VARIABLE, STR_UNIVARIATE_VARIABLE]:
X, X_len = self._format_uneven_timestamps(X)
Expand Down Expand Up @@ -205,7 +212,6 @@ def fit(self, X, y):
self.shapelet_lengths = self._set_lengths()

shapelet_lengths, seed = self._check_params(self.min_len)
print(shapelet_lengths)
# Generate the shapelets
if self.transform_type == STR_UNIVARIATE_VARIABLE:
self.shapelets_ = self.fitter(
Expand Down Expand Up @@ -482,7 +488,7 @@ def _check_params(self, n_timestamps):
raise ValueError('Input data goint {} timestamps, at least 4 are requiered. Input format should be (n_samples, n_features, n_timestamps)'.format(n_timestamps))
else:
warnings.warn("All the values in 'shapelet_lengths' must be lower than or equal to 'n_timestamps' (got {} > {}). Changed shapelet size to {}".format(shapelet_lengths.max(), n_timestamps, n_timestamps//2))
shapelet_lengths = np.array([n_timestamps//2])
shapelet_lengths = shapelet_lengths[shapelet_lengths > n_timestamps] = n_timestamps//2


rng = check_random_state(self.random_state)
Expand Down
11 changes: 5 additions & 6 deletions convst/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def _custom_from_nested_to_3d_numpy(X):
return np.array([X[i].values.T for i in range(len(X))])
else:
return [X[i].values.T for i in range(len(X))]


@njit(cache=True)
def z_norm_3D(X):
Expand Down Expand Up @@ -66,7 +65,7 @@ def z_norm_3D_list(X):
return X


def load_sktime_dataset_split(name, normalize=True):
def load_sktime_dataset_split(name, normalize=False):
"""
Load the original train and test splits of a dataset
from the UCR/UEA archive by name using sktime API.
Expand Down Expand Up @@ -120,7 +119,7 @@ def load_sktime_dataset_split(name, normalize=True):
return X_train, X_test, y_train, y_test, min_len


def load_sktime_arff_file(path, normalize=True):
def load_sktime_arff_file(path, normalize=False):
"""
Load a dataset from .arff files.
Expand Down Expand Up @@ -171,7 +170,7 @@ def load_sktime_arff_file(path, normalize=True):
return X_train, X_test, y_train, y_test, le


def load_sktime_arff_file_resample_id(path, rs_id, normalize=True):
def load_sktime_arff_file_resample_id(path, rs_id, normalize=False):
"""
Load a dataset resample from .arff files and the identifier of the
resample.
Expand Down Expand Up @@ -224,7 +223,7 @@ def load_sktime_arff_file_resample_id(path, rs_id, normalize=True):

return X_train, X_test, y_train, y_test, le

def load_sktime_ts_file(path, normalize=True):
def load_sktime_ts_file(path, normalize=False):
"""
Load a dataset from .ts files
Expand Down Expand Up @@ -274,7 +273,7 @@ def load_sktime_ts_file(path, normalize=True):

return X_train, X_test, y_train, y_test, le

def load_sktime_dataset(name, normalize=True):
def load_sktime_dataset(name, normalize=False):
"""
Load a dataset from the UCR/UEA archive by name using sktime API
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[project]
name = "convst"
version = "0.2.2"

version = "0.2.3"

description = "The Random Dilation Shapelet Transform algorithm and associated works"
readme = "README.md"
authors = [
Expand Down
Loading

0 comments on commit 47869a6

Please sign in to comment.