diff --git a/.circleci/config.yml b/.circleci/config.yml index 5cadd5d..1b4d989 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -46,7 +46,7 @@ jobs: name: python/default steps: - coveralls/upload: - carryforward: 3.11, 3.12 + carryforward: 3.11, 3.12, 3.13 parallel_finished: true workflows: @@ -56,7 +56,7 @@ workflows: - tests: matrix: parameters: - version: ["3.11", "3.12"] + version: ["3.11", "3.12", "3.13"] - coverage: requires: - tests diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..555f555 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = "sequentia/model_selection/_validation.py" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..60404dc --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.ipynb linguist-documentation \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5025358..b953c5e 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,6 @@ venv.bak/ # Changelog entry ENTRY.md + +# Jupyter Notebook checkpoints +*.ipynb_checkpoints/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8230514..c25eafb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,13 +11,13 @@ repos: pass_filenames: false # ruff check (w/autofix) - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 # should match version in pyproject.toml + rev: v0.8.4 # should match version in pyproject.toml hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] # ruff format - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 # should match version in pyproject.toml + rev: v0.8.4 # should match version in pyproject.toml hooks: - id: ruff-format # # pydoclint - docstring formatting diff --git a/CHANGELOG.md b/CHANGELOG.md index c141487..c63cf24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -388,6 +388,21 @@ Nothing, initial release! +## [v2.5.0](https://github.com/eonu/sequentia/releases/tag/v2.5.0) - 2024-12-27 + +### Documentation + +- update copyright notice ([#255](https://github.com/eonu/sequentia/issues/255)) + +### Features + +- add `mise.toml` and support `numpy>=2` ([#254](https://github.com/eonu/sequentia/issues/254)) +- add python v3.13 support ([#253](https://github.com/eonu/sequentia/issues/253)) +- add library benchmarks ([#256](https://github.com/eonu/sequentia/issues/256)) +- add `model_selection` sub-package for hyper-parameters ([#257](https://github.com/eonu/sequentia/issues/257)) +- add model spec support to `HMMClassifier.__init__` ([#258](https://github.com/eonu/sequentia/issues/258)) +- add `HMMClassifier.fit` multiprocessing ([#259](https://github.com/eonu/sequentia/issues/259)) + ## [v2.0.2](https://github.com/eonu/sequentia/releases/tag/v2.0.2) - 2024-04-13 ### Bug Fixes diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index fe583f6..3988459 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -50,6 +50,6 @@ We are thankful for their work and all the communities who have paved the way wi ---

- Sequentia © 2019-2025, Edwin Onuonga - Released under the MIT license.
+ Sequentia © 2019, Edwin Onuonga - Released under the MIT license.
Authored and maintained by Edwin Onuonga.

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1430004..f934143 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -105,6 +105,6 @@ By contributing, you agree that your contributions will be licensed under the re ---

- Sequentia © 2019-2025, Edwin Onuonga - Released under the MIT license.
+ Sequentia © 2019, Edwin Onuonga - Released under the MIT license.
Authored and maintained by Edwin Onuonga.

diff --git a/LICENSE b/LICENSE index c5d8701..c02e87e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2025 Edwin Onuonga (eonu) +Copyright (c) 2019 Edwin Onuonga (eonu) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 9721eec..b52d532 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ About · Build Status · Features · + Installation · Documentation · Examples · Acknowledgments · @@ -57,6 +58,8 @@ Some examples of how Sequentia can be used on sequence data include: - **Simplicity and interpretability**: Sequentia offers a limited set of machine learning algorithms, chosen specifically to be more interpretable and easier to configure than more complex alternatives such as recurrent neural networks and transformers, while maintaining a high level of effectiveness. - **Familiar and user-friendly**: To fit more seamlessly into the workflow of data science practitioners, Sequentia follows the ubiquitous Scikit-Learn API, providing a familiar model development process for many, as well as enabling wider access to the rapidly growing Scikit-Learn ecosystem. +- **Speed**: Some algorithms offered by Sequentia naturally have restrictive runtime scaling, such as k-nearest neighbors. However, our implementation is +optimized to the point of being multiple orders of magnitude faster than similar packages — see the [Benchmarks](#benchmarks) section for more information. ## Build Status @@ -68,33 +71,99 @@ Some examples of how Sequentia can be used on sequence data include: ### Models -The following models provided by Sequentia all support variable length sequences. - #### [Dynamic Time Warping + k-Nearest Neighbors](https://sequentia.readthedocs.io/en/latest/sections/models/knn/index.html) (via [`dtaidistance`](https://github.com/wannesm/dtaidistance)) +Dynamic Time Warping (DTW) is a distance measure that can be applied to two sequences of different length. +When used as a distance measure for the k-Nearest Neighbors (kNN) algorithm this results in a simple yet +effective inference algorithm. + - [x] Classification - [x] Regression +- [x] Variable length sequences - [x] Multivariate real-valued observations - [x] Sakoe–Chiba band global warping constraint - [x] Dependent and independent feature warping (DTWD/DTWI) - [x] Custom distance-weighted predictions -- [x] Multi-processed predictions +- [x] Multi-processed prediction #### [Hidden Markov Models](https://sequentia.readthedocs.io/en/latest/sections/models/hmm/index.html) (via [`hmmlearn`](https://github.com/hmmlearn/hmmlearn)) -Parameter estimation with the Baum-Welch algorithm and prediction with the forward algorithm [[1]](#references) +A Hidden Markov Model (HMM) is a state-based statistical model which represents a sequence as +a series of observations that are emitted from a collection of latent hidden states which form +an underlying Markov chain. Each hidden state has an emission distribution that models its observations. + +Expectation-maximization via the Baum-Welch algorithm (or forward-backward algorithm) [[1]](#references) is used to +derive a maximum likelihood estimate of the Markov chain probabilities and emission distribution parameters +based on the provided training sequence data. - [x] Classification -- [x] Multivariate real-valued observations (Gaussian mixture model emissions) -- [x] Univariate categorical observations (discrete emissions) +- [x] Variable length sequences +- [x] Multivariate real-valued observations (modeled with Gaussian mixture emissions) +- [x] Univariate categorical observations (modeled with discrete emissions) - [x] Linear, left-right and ergodic topologies -- [x] Multi-processed predictions +- [x] Multi-processed training and prediction ### Scikit-Learn compatibility -**Sequentia (≥2.0) is fully compatible with the Scikit-Learn API (≥1.4), enabling for rapid development and prototyping of sequential models.** +**Sequentia (≥2.0) is compatible with the Scikit-Learn API (≥1.4), enabling for rapid development and prototyping of sequential models.** + +The integration relies on the use of [metadata routing](https://scikit-learn.org/stable/metadata_routing.html), +which means that in most cases, the only necessary change is to add a `lengths` key-word argument to provide +sequence length information, e.g. `fit(X, y, lengths=lengths)` instead of `fit(X, y)`. + +### Similar libraries + +As DTW k-nearest neighbors is the core algorithm offered by Sequentia, below is a comparison of the DTW k-nearest neighbors algorithm features supported by Sequentia and similar libraries. + +||**`sequentia`**|[`aeon`](https://github.com/aeon-toolkit/aeon)|[`tslearn`](https://github.com/tslearn-team/tslearn)|[`sktime`](https://github.com/sktime/sktime)|[`pyts`](https://github.com/johannfaouzi/pyts)| +|-|:-:|:-:|:-:|:-:|:-:| +|Scikit-Learn compatible|✅|✅|✅|✅|✅| +|Multivariate sequences|✅|✅|✅|✅|❌| +|Variable length sequences|✅|✅|➖1|❌2|❌3| +|No padding required|✅|❌|➖1|❌2|❌3| +|Classification|✅|✅|✅|✅|✅| +|Regression|✅|✅|✅|✅|❌| +|Preprocessing|✅|✅|✅|✅|✅| +|Multiprocessing|✅|✅|✅|✅|✅| +|Custom weighting|✅|✅|✅|✅|✅| +|Sakoe-Chiba band constraint|✅|✅|✅|✅|✅| +|Itakura paralellogram constraint|❌|✅|✅|✅|✅| +|Dependent DTW (DTWD)|✅|✅|✅|✅|❌| +|Independent DTW (DTWI)|✅|❌|❌|❌|✅| +|Custom DTW measures|❌4|✅|❌|✅|✅| + +- 1`tslearn` supports variable length sequences with padding, but doesn't seem to mask the padding. +- 2`sktime` does not support variable length sequences, so they are padded (and padding is not masked). +- 3`pyts` does not support variable length sequences, so they are padded (and padding is not masked). +- 4`sequentia` only supports [`dtaidistance`](https://github.com/wannesm/dtaidistance), which is one of the fastest DTW libraries as it is written in C. + +### Benchmarks + +To compare the above libraries in runtime performance on dynamic time warping k-nearest neighbors classification tasks, a simple benchmark was performed on a univariate sequence dataset. + +The [Free Spoken Digit Dataset](https://sequentia.readthedocs.io/en/latest/sections/datasets/digits.html) was used for benchmarking and consists of: + +- 3000 recordings of 10 spoken digits (0-9) + - 50 recordings of each digit for each of 6 speakers + - 1500 used for training, 1500 used for testing (split via label stratification) +- 13 features ([MFCCs](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum)) + - Only the first feature was used as not all of the above libraries support multivariate sequences +- Sequence length statistics: (min 6, median 17, max 92) + +Each result measures the total time taken to complete training and prediction repeated 10 times. + +All of the above libraries support multiprocessing, and prediction was performed using 16 workers. -In most cases, the only necessary change is to add a `lengths` key-word argument to provide sequence length information, e.g. `fit(X, y, lengths=lengths)` instead of `fit(X, y)`. +*: `sktime`, `tslearn` and `pyts` seem to not mask padding, which may result in incorrect predictions. + + + +> **Device information**: +> - Product: Lenovo ThinkPad T14s (Gen 6) +> - Processor: AMD Ryzen™ AI 7 PRO 360 (8 cores, 16 threads, 2-5GHz) +> - Memory: 64 GB LPDDR5X-7500MHz +> - Solid State Drive: 1 TB SSD M.2 2280 PCIe Gen4 Performance TLC Opal +> - Operating system: Fedora Linux 41 (Workstation Edition) ## Installation @@ -104,19 +173,21 @@ The latest stable version of Sequentia can be installed with the following comma pip install sequentia ``` -### C library compilation +### C libraries -For optimal performance when using any of the k-NN based models, it is important that `dtaidistance` C libraries are compiled correctly. +For optimal performance when using any of the k-NN based models, it is important that the correct `dtaidistance` C libraries are accessible. -Please see the [`dtaidistance` installation guide](https://dtaidistance.readthedocs.io/en/latest/usage/installation.html) for troubleshooting if you run into C compilation issues, or if setting `use_c=True` on k-NN based models results in a warning. +Please see the [`dtaidistance` installation guide](https://dtaidistance.readthedocs.io/en/latest/usage/installation.html) for troubleshooting if you run into C compilation issues, or if using k-NN based models with `use_c=True` results in a warning. -You can use the following to check if the appropriate C libraries have been installed. +You can use the following to check if the appropriate C libraries are available. ```python from dtaidistance import dtw dtw.try_import_c() ``` +If these libraries are unavailable, Sequentia will fall back to using a Python alternative. + ### Development Please see the [contribution guidelines](/CONTRIBUTING.md) to see installation instructions for contributing to Sequentia. @@ -127,26 +198,25 @@ Documentation for the package is available on [Read The Docs](https://sequentia. ## Examples -Demonstration of classifying multivariate sequences with two features into two classes using the `KNNClassifier`. +Demonstration of classifying multivariate sequences into two classes using the `KNNClassifier`. -This example also shows a typical preprocessing workflow, as well as compatibility with Scikit-Learn. +This example also shows a typical preprocessing workflow, as well as compatibility with +Scikit-Learn for pipelining and hyper-parameter optimization. -```python -import numpy as np +--- -from sklearn.preprocessing import scale -from sklearn.decomposition import PCA -from sklearn.pipeline import Pipeline +First, we create some sample multivariate input data consisting of three sequences with two features. -from sequentia.models import KNNClassifier -from sequentia.preprocessing import IndependentFunctionTransformer, median_filter +- Sequentia expects sequences to be concatenated and represented as a single NumPy array. +- Sequence lengths are provided separately and used to decode the sequences when needed. + +This avoids the need for complex structures such as lists of nested arrays with different lengths, +or a 3D array with wasteful and annoying padding. -# Create input data -# - Sequentia expects sequences to be concatenated into a single array -# - Sequence lengths are provided separately and used to decode the sequences when needed -# - This avoids the need for complex structures such as lists of arrays with different lengths +```python +import numpy as np -# Sequences +# Sequence data X = np.array([ # Sequence 1 - Length 3 [1.2 , 7.91], @@ -168,12 +238,47 @@ lengths = np.array([3, 5, 2]) # Sequence classes y = np.array([0, 1, 1]) +``` + +With this data, we can train a `KNNClassifier` and use it for prediction and scoring. + +**Note**: Each of the `fit()`, `predict()` and `score()` methods require the sequence lengths +to be provided in addition to the sequence data `X` and labels `y`. + +```python +from sequentia.models import KNNClassifier + +# Initialize and fit the classifier +clf = KNNClassifier(k=1) +clf.fit(X, y, lengths=lengths) + +# Make predictions based on the provided sequences +y_pred = clf.predict(X, lengths=lengths) + +# Make predicitons based on the provided sequences and calculate accuracy +acc = clf.score(X, y, lengths=lengths) +``` + +Alternatively, we can use [`sklearn.preprocessing.Pipeline`](https://scikit-learn.org/1.5/modules/generated/sklearn.pipeline.Pipeline.html) to build a more complex preprocessing pipeline: + +1. Individually denoise each sequence by applying a [median filter](https://sequentia.readthedocs.io/en/latest/sections/preprocessing/transforms/filters.html#sequentia.preprocessing.transforms.median_filter) to each sequence. +2. Individually [standardize](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.scale.html) each sequence by subtracting the mean and dividing the s.d. for each feature. +3. Reduce the dimensionality of the data to a single feature by using [PCA](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html). +4. Pass the resulting transformed data into a `KNNClassifier`. + +**Note**: Steps 1 and 2 use [`IndependentFunctionTransformer`](https://sequentia.readthedocs.io/en/latest/sections/preprocessing/transforms/function_transformer.html#sequentia.preprocessing.transforms.IndependentFunctionTransformer) provided by Sequentia to +apply the specified transformation to each sequence in `X` individually, rather than using +[`FunctionTransformer`](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html#sklearn.preprocessing.FunctionTransformer) from Scikit-Learn which would transform the entire `X` +array once, treating it as a single sequence. -# Create a transformation pipeline that feeds into a KNNClassifier -# 1. Individually denoise each sequence by applying a median filter for each feature -# 2. Individually standardize each sequence by subtracting the mean and dividing the s.d. for each feature -# 3. Reduce the dimensionality of the data to a single feature by using PCA -# 4. Pass the resulting transformed data into a KNNClassifier +```python +from sklearn.preprocessing import scale +from sklearn.decomposition import PCA +from sklearn.pipeline import Pipeline + +from sequentia.preprocessing import IndependentFunctionTransformer, median_filter + +# Create a preprocessing pipeline that feeds into a KNNClassifier pipeline = Pipeline([ ('denoise', IndependentFunctionTransformer(median_filter)), ('scale', IndependentFunctionTransformer(scale)), @@ -181,14 +286,51 @@ pipeline = Pipeline([ ('knn', KNNClassifier(k=1)) ]) -# Fit the pipeline to the data - lengths must be provided +# Fit the pipeline to the data pipeline.fit(X, y, lengths=lengths) -# Predict classes for the sequences and calculate accuracy - lengths must be provided +# Predict classes for the sequences and calculate accuracy y_pred = pipeline.predict(X, lengths=lengths) + +# Make predicitons based on the provided sequences and calculate accuracy acc = pipeline.score(X, y, lengths=lengths) ``` +For hyper-parameter optimization, Sequentia provides a `sequentia.model_selection` sub-package +that includes most of the hyper-parameter search and cross-validation methods provided by +[`sklearn.model_selection`](https://scikit-learn.org/stable/api/sklearn.model_selection.html), +but adapted to work with sequences. + +For instance, we can perform a grid search with k-fold cross-validation stratifying over labels +in order to find an optimal value for the number of neighbors in `KNNClassifier` for the +above pipeline. + +```python +from sequentia.model_selection import StratifiedKFold, GridSearchCV + +# Define hyper-parameter search and specify cross-validation method +search = GridSearchCV( + # Re-use the above pipeline + estimator=Pipeline([ + ('denoise', IndependentFunctionTransformer(median_filter)), + ('scale', IndependentFunctionTransformer(scale)), + ('pca', PCA(n_components=1)), + ('knn', KNNClassifier(k=1)) + ]), + # Try a range of values of k + param_grid={"knn__k": [1, 2, 3, 4, 5]}, + # Specify k-fold cross-validation with label stratification using 4 splits + cv=StratifiedKFold(n_splits=4), +) + +# Perform cross-validation over accuracy and retrieve the best model +search.fit(X, y, lengths=lengths) +clf = search.best_estimator_ + +# Make predicitons using the best model and calculate accuracy +acc = clf.score(X, y, lengths=lengths) +``` + ## Acknowledgments In earlier versions of the package, an approximate DTW implementation [`fastdtw`](https://github.com/slaypni/fastdtw) was used in hopes of speeding up k-NN predictions, as the authors of the original FastDTW paper [[2]](#references) claim that approximated DTW alignments can be computed in linear memory and time, compared to the O(N2) runtime complexity of the usual exact DTW implementation. @@ -262,12 +404,12 @@ All contributions to this repository are greatly appreciated. Contribution guide Sequentia is released under the [MIT](https://opensource.org/licenses/MIT) license. -Certain parts of the source code are heavily adapted from [Scikit-Learn](scikit-learn.org/). +Certain parts of source code are heavily adapted from [Scikit-Learn](scikit-learn.org/). Such files contain a copy of [their license](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING). ---

- Sequentia © 2019-2025, Edwin Onuonga - Released under the MIT license.
+ Sequentia © 2019, Edwin Onuonga - Released under the MIT license.
Authored and maintained by Edwin Onuonga.

diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..f8f49c1 --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Collection of runtime benchmarks for Python packages +providing dynamic time warping k-nearest neighbors algorithms. +""" diff --git a/benchmarks/benchmark.svg b/benchmarks/benchmark.svg new file mode 100644 index 0000000..3f9a775 --- /dev/null +++ b/benchmarks/benchmark.svg @@ -0,0 +1,1621 @@ + + + + + + + + 2024-12-24T17:13:37.655962 + image/svg+xml + + + Matplotlib v3.10.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmarks/plot.ipynb b/benchmarks/plot.ipynb new file mode 100644 index 0000000..0642d70 --- /dev/null +++ b/benchmarks/plot.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "ed902379-677e-4c90-aa1c-95ef9dbb1d11", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.style.use(\"ggplot\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6649bf2d-7430-401d-8113-f3c1e1cf4779", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAdWJJREFUeJzt3XdYFFfbBvB76b2DgIgUQRAQsXewIPYu2GI3MXaNxoKxRWxRE40ajSViNFExdrGXWBN7w4KKiCAICIiAlIX5/vhkXlb6LgbQ+3ddXDAzZ+Y8M5yd3WfnzBmJIAgCiIiIiIiIFKBU3gEQEREREVHlx8SCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsSCiIiIiIgUxsTiEzZkyBC4uroWuGzixImwsbEp9Ta9vLzQuXPnMoju49aZlJSEuXPn4v79+2UWx5AhQyCRSCCRSKCsrAxDQ0PUr18f06ZNw4sXL8RyW7ZsEcsV9bN582ZIJBI8fvxYpp6ff/4ZEokEc+bMkZmfkJAAJSUlLF26tMgYC/uff7gfgwYNyrese/fu8PLyEqfPnj0LiUQCfX19JCUlyZTdt28fJBIJwsPDi6zvv/LHH3/AwcEBqqqqqFOnTnmHU2nExsZCV1cX9+7dE+ft3LkTvXr1gpWVFSQSCZYtW1aibeW2lw9/+vbtm6/swYMH4e7uDg0NDTg6OuK3337LVyYzMxNTp06Fubk5tLW14e3tjUePHhUbR97Xat6fvOeR+/fvo0+fPrCysoKGhgasrKzQuXNnHDlyRCzz4WtZV1cXTk5OGDZsGK5cuVJkvUWdIwrzMc5befcjPj5ervVDQ0MhkUgQERGBW7duYe7cuUhLSyvTGHP99NNPCA4O/ijbJsV8//338Pb2hoGBASQSCa5du1Zo2cDAQHh4eEBDQwMmJibo0KED3r1795/GW5zy+DyTV0BAALy9vcut/o+FiQWVytq1a7F8+fIKX2dSUhLmzZtX5m/QdnZ2uHz5Mi5cuIA///wT3bt3x/bt2+Hq6oqTJ08CADp16oTLly+LP7NmzQIAHD16VGZ+s2bNAACXLl2SqePixYvQ0tLKN//SpUsQBAHNmzcvk335448/8PTp0xKVTU5Oxk8//VQm9X4MKSkpGDZsGJo3b46zZ8/i999/L++QKo2AgAB4eXnJJKS7d+9GWFiY3G+6v/32m0xbX7BggczyCxcuoEePHmjSpAmOHDkCPz8/DB8+HLt375YpN378eGzYsAELFy7Enj17kJGRgTZt2uDNmzfFxpD7Ws37k3seefr0KRo1aoSXL19i+fLlOHLkCL7//ntoamri7Nmz+baV+9rdv38/JkyYgLt376Jx48ZYsmRJofUWdY4ozMc6b+WekwwMDORa/+DBg6hduzasra1x69YtzJs3j4nFZ2j9+vXIzMxE27ZtiywXEBCAcePGwc/PD8eOHcP69etha2uL7Ozs/yzWymDMmDG4cuUKzpw5U96hlC2BPlmDBw8WXFxcClw2YcIEoXr16v95TKWRlpYm97rPnj0TAAhBQUFlFk9hx/P169eCi4uLYGRkJLx58ybf8t9++00AIMTFxeVbZmpqKowcOVJmnpWVlTB69GhBR0dHkEql4vzp06cLGhoaQkZGRqlj/LCMg4ODYGVlJQwbNkxmWbdu3QRPT09x+syZMwIAoVWrVoKhoaHM/u3du1cAIDx79qzI+j6m9PR0ITs7W7h7964AQDh16pTC25RKpUJmZmaZxFfRvX37VtDW1hb27NkjMz87O1v8G4Dwww8/lGh7ue3l6tWrRZZr166d0LRpU5l5/fr1E5ydncXpFy9eCMrKysL69evFea9fvxa0tbWFJUuWFLn94l4H/v7+go6OjpCamppvWd59L+y1m52dLQwcOFCQSCTC+fPni623uHNErtKctxQ5P5aWl5eXMHPmTEEo5nxWFqpXry6MGTPmo2ybFJP72ijqdf7w4UNBRUVFCA4OLocISyb3tePp6Sl06tSpzLef+75UEkOHDhW6detW5jGUJ16xICDPpfKbN2+iQ4cO0NbWhoODA7Zu3SpTLu+lw9xuDx9eDs3Ozoa5uTlmzJgBAHj48CH69u2LatWqQUtLC7Vq1cLy5cuRk5MjrhMeHg6JRIItW7Zg5MiRMDY2RsOGDfPVWZLthYeHw9bWFgDQp08fsWtCbpedjIwMzJw5E9WrV4e6ujqcnZ3xxx9/yH3sjIyMsHTpUiQkJGDHjh2lWrdZs2a4ePGiOB0REYHIyEhMmDAB6enpuHPnjrjs4sWLqF+/PtTU1Eq8/ZycHIwYMQImJiYy/yc1NTVMmzYNv//+O54/f17sdqZMmYL09HT8/PPPpdq/3P9rYGAghg8fDn19fRgZGWHy5MmQSqUyZSMjIzFw4ECYmJhAU1MTLVu2xPXr12XK2NjYYOzYsVi6dCmqV68OTU1NjB8/Hm5ubgCANm3aQCKRYO7cucD77mPDhg0Tt9m0aVOcO3dOZpu57SswMBA1a9aEuro6bt++LXYrO3nyJGrXrg1NTU14enoiPDwcCQkJ8PX1hZ6eHuzt7bFz506ZbR4+fBje3t4wMzODnp4eGjVqhKNHj8qUKelrLnd7zZo1g5aWFgwNDeHl5YWbN2+Ky5OSkjB69GhYWFhAXV0d9erVw/Hjx4v9/+ReIejQoYPMfCWlj/fWkJGRgTNnzqBPnz4y8/v27YsHDx6Ir9Pjx48jJydHppyRkRHatWun8DfaiYmJ0NPTg5aWVr5lJdl3JSUlrFy5Eurq6li7dm2x5UtyjijqvFXU+bE0bS23K1Tu9rZt24axY8fC0NAQFhYWmDJlSr7XZVJSEi5cuIAuXbpgy5YtGDp0KADA1NQUEolEpkttSV7DBw4cQP369aGjowMDAwPUr19f/H/a2Njg+fPnWLNmjbj/W7ZsKfS4bt68GS4uLtDU1ISxsTGaN2+Oq1evissFQcCyZcvg6OgIdXV12NnZ4ccff8y3nf3798PJyQkaGhpo2LAhrl69CgMDA/E8gjznnrwK6g5akveX3HPL2bNn4eHhAW1tbTRs2DDfscrJycGKFSvg7OwMdXV1mJubo0+fPjJX7B48eIBu3bpBX18f2tra6NSpU74r0cUdp5IqyWvjt99+g62tbb5zSnGGDx+OFi1aiNPx8fFQUlJCgwYNxHkpKSlQVVVFUFCQOO/cuXNo2rQpNDU1YWJigmHDhiEhIUFcXtRr50Pv3r1Dp06dYGdnh7CwMECB96WEhARERkbC19cXVapUgYaGBmxtbTFp0iSZdfv06YPDhw/L3U2xImJiQTIGDBiAdu3aYd++ffDw8MCQIUPw4MGDAsu2bNkSlpaW+d4oT58+jVevXqF///4AgKioKNSsWRNr165FcHAwvvzyS8yfPx/ff/99vm3OmDEDgiDgzz//xA8//FBgvcVtz8LCAnv27AEALFy4UOwGYWFhAQDw9fXF+vXr8c033+DQoUNo3749Bg4cKNO3urRat24NFRUVXL58uVTrNWvWDA8ePEBiYiLwPnmoVq0aHB0d4e7uLiYdWVlZuHr1aqm6QUmlUgwYMACHDx/G2bNnUb9+fZnlI0aMgLGxMRYuXFjstszMzPDVV1/hxx9/REpKSqn2EQBmzpyJnJwc7Nq1C1OnTsXPP/8sdhHD+w96zZs3x61bt/Dzzz/jr7/+gra2Nlq3bo3Y2FiZbf311184dOgQVq5cif3792PatGnih/E1a9bg8uXLGDFiBLKzs9GhQwccPHgQS5YsQVBQEHR0dODt7Z3vjeHatWv44YcfMH/+fAQHB6NatWoAgJiYGHzzzTfw9/fH9u3b8fTpUwwYMAB+fn5wc3PDX3/9hXr16mHgwIEyCdqzZ8/QpUsX/P777/jrr7/QrFkzdOzYscBuNsW95nbu3IkuXbrAzMwMf/zxB7Zv345mzZohKioKeH8fgre3Nw4dOoSAgAAcOHAAtWrVQqdOnXD37t0i/y8nT55E3bp1oaGhUcr/aNE6duwIZWVlWFlZYerUqTJ9q58+fYqsrCw4OTnJrOPs7Ay8/+Ig97eZmRkMDQ3zlcstUxypVCrzk6tevXp4+fIlRo0ahVu3bsl8yVFSRkZGqFevXolf88WdI4o7b6GQ82Np2tqH/P39oaSkhF27dmHUqFFYvnw5Nm7cKFPm6NGjMDIyQsOGDdGpU6d8XTv37t0LlPA1/PTpU/Tu3RsuLi7Yu3cvdu7cCV9fX/H8t3fvXpibm6N3797i/nfq1KnA2M+dO4fhw4ejY8eOCA4OxtatW9GmTRuZe8EmTJiA2bNnY/DgwTh8+DCGDBmCadOmYd26dWKZW7duoVevXnBwcMCePXswePBg+Pr6IiMjo9jjV5CSvr/ExMRg/PjxmDp1Knbt2oX09HT06NEDWVlZYplx48bh22+/RefOnXHw4EGsWbMGurq64jk4LCwMTZs2RUJCArZs2YI//vgDcXFxaNOmjRh/SY5TWfrnn3/g5uaGBQsWwMzMDGpqamjWrBn+/fffItdr2bIlrl69ivT0dDFudXV13Lx5E2/fvgXedweWSqVo2bIlAOD69evw9vaGrq4ugoKCsGTJEhw8eBAdOnTI1+2quM8WKSkp6NixI54+fYrz58/Dzs5OofclbW1tDBo0CHfu3MGqVatw9OhRzJs3L19cTZo0QXZ2doler5VGeV8yoY+nNF2hci9vr1mzRpyXkpIiaGlpCd9//70478NLh5MmTRKsrKyEnJwccd7QoUMLrTcnJ0fIysoSAgICBAsLC3F+bheA9u3b51unqMuVxW3vwy4Fp0+fFgAIx44dk5nv5+cnNGjQoMA6chXXvcLc3LzA+IvqOnD58mUBgHD48GFBEARh7Nixgp+fnyAIgjBu3DihX79+giAIwj///CMAEA4dOlSiGNPT04WuXbsK1tbWQmhoaKH7sXz5ckFNTU148eKFIBTRFerq1atCVFSUoK6uLnZDKUlXqNz/Q4sWLWTmf/fdd4KWlpaQkJAgCIIgzJ49W9DX1xdevXollklPTxesra2FqVOnivOqV68uGBsbCykpKTLbu3nzpgBAOHPmjDhv//79AgDh6NGj4rzMzEzB2tpa6NmzpzjP09NTUFVVFSIiIvIdJ4lEIty7d0+c9/PPPwsAhGnTponzEhMTBWVlZeGnn34q8BhkZ2cLWVlZQrt27cT/p1DC11xOTo5gZWUl+Pj4FHqMN2/eLKioqAghISEy8xs1aiT06dOn0PUEQRAcHR2L7XZSmq5QN27cEL799lvh0KFDwqlTpwR/f39BXV1d5vV74cIFAYBw+fJlmXXj4uIEAML27dsFQRCEESNGCDVr1sxXxw8//CCoqqoWGcfgwYMFAPl+crstSaVSoX///uJ8XV1doVu3bsL+/ftltlNct5++ffsKGhoaMvXKc47IVdh5q6jzY17FtbXc/cjd3oftw9PTU2jTpo3MvAEDBgiDBw8udFu5SvIaDgoKEgAIycnJhe5DSbtC/fDDD4KRkVGhy588eSJIJBKZrnSCIAjTpk0TzM3Nxa4qfn5+gq2trUy3002bNgkAhDlz5hQZ14fnwJK+vxR0bsk91+a20UePHgkSiURYuHBhofs4aNAgwc7OTnj37p04LzY2VtDR0RHPK8UdJ3kU1RWqZs2ago6OjuDg4CAEBQUJhw8fFpo1aybo6enJtI0PhYWFCQCEs2fPCsL7zyj9+vUTjI2NhSNHjgjC+y6Mjo6O4jo9evQQrK2tZbquHjt2TAAgHDhwQBBK+NkiISFBaNSokeDu7i4To6LvS9ra2sKqVauKPZ7Vq1cXpkyZUmy5yoJXLEhGu3btxL+1tbVRvXp1REZGFlq+X79+iIyMxIULF4D3357u3bsX/fr1E8ukp6djzpw5qFGjBtTV1aGqqgp/f39ER0fn+/a7sG+n8irN9j50/PhxGBkZoXXr1jLfZHp7e+PmzZsK3VwmCAIkEkmp1qlXrx40NTXFKxMXL15E06ZNgfffZOSdL5FIxGXZ2dky8f//57//9+7dO3Tu3BkPHjzA+fPn4eDgUGj9o0aNgr6+PhYvXlxsrJaWlhg+fDiWL19e6hs3e/ToITPdu3dvpKWlid+oHz9+HK1atYKRkZG4T8rKyvD09Mx3yd7Lywva2trF1nn+/Hno6enBx8dHnKeqqoqePXuK7TVX7dq1xasUH+6zi4uLOO3o6AgAMjcvGhgYwMzMTGbUn8jISAwePBhVq1aFiooKVFVVcfz4cYSGhuaro6jX3KNHjxAZGYlhw4YVup/Hjx+Hm5sbHB0d87Xp4ro7REdHw9TUtMgypeHh4YElS5agU6dOaN26NRYsWIDly5fj8OHDBY6i9DHZ29vj6tWrMj+5o4UpKytj+/btuHfvHhYtWoQWLVrg+PHj6NatG2bPnl3iOkr7mpfnHJFXQefH0rS1D+VtewBQq1YtmfN9dnY2jhw5gi5duhS7rZK8hmvXrg1lZWX0798fBw8eLNFN+IWpW7cuEhISMGTIEJw4cSLfOSn3RvlevXrJvC7atm2LmJgY8fX677//okuXLlBWVhbX7d27t1wxleb95cNzS61atYD3/0+8v/IvCAKGDx9eZH1du3aFioqKWJehoSE8PDzEY17ccSprOTk5SElJwe7du9G7d2907NgRBw4cgCAIWL16daHr2drawsrKSuyqeu7cOXh5eaFFixb4+++/xXm5Vyvw/hzfrVs3qKqqivPatWsHAwODfOf4wj5bxMfHo1WrVgCAM2fOwMzMTFym6PtS3bp1sWzZMvzyyy948uRJoftuYmKC6OjoQpdXNkwsPmEqKiqFflDOzs6WeTHm+nDUEDU1NfHSZEEaNGgAe3t7/PnnnwCAI0eOICkpSSaxmDZtGn744QeMHDkSwcHBuHr1qng5/cNtV6lSpdj9Ks32PhQfH4+EhASoqqrK/IwYMQJSqVTuF3d6ejpev34Nc3PzUq2nqqqKBg0a4OLFi0hJScGdO3fE5KFp06biPRcXL15ErVq1xG4hbdq0kYk/98QLAHFxcfj777/RqVMnWFtbF1m/lpYWJk+ejE2bNpVo36dNm4akpCSsX7++VPuZ92SNPP/n3Drj4+Oxb9++fP+X33//Pd8wnSVpI3jfNePDenPXz9sHt6htFvR6KGx+btvLyclB165dceHCBcyfPx9nzpzB1atX0aFDhwLbZ1Hbev36NfD+Q0hh4uPjcfPmzXzHbsGCBcUOcZqeng51dfUiyyjK19cXeN9tAYDYhj/8UJnbHcbIyEgsV9AHz8TERLFMUTQ0NFC/fn2ZHx0dHZkyLi4umD59Og4fPoznz5/Dw8MDixYtytc+ChMZGVni17y854i8PmynpW1rHyrufH/p0iWkpKTkS0AKUpLXsKOjIw4dOoQ3b96gR48eMDU1RdeuXREREVGKo/D/Wrdujd9//x0hISHw8fGBiYkJBg0aJP7v4uPjIQgCTExMZOLJHd4zN6bo6Oh85wk9PT25ugeW5v2lsHNL3te+iopKgeewvPX99NNP+eo7f/68uH/FHaeyZmhoCGNjY9SuXVucZ2RkBA8PD4SEhBS5rqenJ86dO4fk5GTcvn0bLVu2RMuWLXHu3DlkZGTgypUrMolFYmJigefu0pzjQ0NDcfv2bfTr1y9ft0tF35d27tyJNm3awN/fHw4ODnBychK7O+alrq5e4YbiVYRKeQdAH4+pqSliYmIKXPby5csiT1il0a9fP6xfvx6rVq3Cjh070KhRI9jZ2YnLg4KC8NVXX2HatGnivMOHDxe4rZJ8m1ea7X3IyMgIpqamhd78Ke8xOXXqFKRSqZgUlEbz5s3x008/4cKFC1BXVxe/Va1evTosLCxw8eJFXLp0Cd26dRPXWb9+vdjvFABq1qwp/m1tbY25c+eib9++MDExgb+/f5H1jxkzBj/88EORz8fIu+3Bgwfjhx9+wIoVK0q8jx/2R3316hXwvl853v9f2rdvX+B9Nx9+8C3pN75GRkb56s2t+8MPpop8i/yhJ0+e4ObNm9i3b5/M/0yeNw5jY2Pg/eu1MEZGRqhduzY2bdpU6u0bGRl9tL7WhbG3t4eqqioePnwoczUp976J3HsvnJyc8OrVKyQmJsq84T98+DDf/RllwdTUFEOHDsX48ePx+PFjNGrUqMjyr1+/xrVr10r87bYi54hcH7bTsmxrBTl06BBatmwJXV3dYsuW9DXcvn17tG/fHsnJyTh69CgmTZqEoUOH4tSpU6WOb+DAgRg4cCDi4+Oxf/9+TJo0Caqqqti0aROMjIwgkUhw4cKFAge8yD1nWlhY5DtPJCcn50vMNDQ0kJmZKTMvNxnOewzK6v3F2NgYUqkUsbGxha5nZGSETp06YfTo0fmW5f2fFXWcypqLi0uhw5gXl+y2bNkSkydPxtmzZ2FiYgInJyekpqZi2rRpOHPmDDIyMmRu8C6Lc3zTpk3Rtm1bTJ48GcbGxhg4cKDM9hV5X7KwsMDmzZuxceNGXL9+HQsWLICfnx8ePXok8xkpKSlJ5upVZccrFp8wT09PJCUl5RsFJzk5GWfOnJHJ/BXRr18/xMXF4cCBAzhw4IDM1Qq8f5PLe2LPzs4u9ehJpd3eh9/+5Grbti3i4uKgpqaW79vM0o64lCsxMRHTpk2DiYlJgQ8CK07z5s2RlpaG1atXo0GDBlBR+V++37RpU/z++++IiYkRn3uB92+KeeP+8I2/d+/eCAwMxOzZs4t9/oSuri4mTpyI9evXF3iS/tCMGTMQFxeHDRs2lHgfc2/wzLV7925oaWmJozm1bdsW9+/fh7Ozc77/SW6Z0mrevDmSk5NlRkeSSqXYu3dvmT0LpCC5H+rytqXnz5/LjP5VUjVr1oSVlVWBD4/L1bZtW4SFhcHS0rLANl3c9p89e1bquEoj97WZO7qLuro6WrVqle+ZFTt37oSzs7M4ylC7du2gpKSEv/76SyyTmJiI48ePo2PHjgrFlJvYfii3+1BxVxVycnIwceJEZGZmYsyYMcXWV9JzRGHnrcKUZVsryKFDh/J1gyrq3Fqa17Cenh58fX3F0cDybr+k+5/LxMQEw4cPh7e3t7itNm3aAO8TwIJeF7nnzIYNG+LgwYMyV/c/bJsAYGVllW8gkw9HXivL95fWrVtDIpEU+9q/d+8ePDw88tWV98umoo5TWevcuTNev36NW7duifNev36NGzduoF69ekWu27JlS6SmpmLFihXi55M6depAU1MTixcvRrVq1WRGIWvevDn27dsnMzDDiRMnkJSUVKpz/MSJE7FgwQIMGTJE5n9fVu9LuaNbLViwAFKpVKZbVE5ODiIiIgr8f1VWvGLxCWvXrh1atGiBnj17Yvbs2XB1dcXLly+xdOlSKCsrY/z48WVST61atVC7dm2MGzcO6enp8PPzk1nu7e2NDRs2oFatWjAxMcHatWvlHnGjpNszNzeHgYEB/vzzT9ja2kJdXR21a9eGt7c3unTpgvbt2+Pbb79F7dq1kZqaipCQEDx58iTfiCgfevfuHf755x/gfVeOa9euYd26dUhOTsa+ffvydbUoiSZNmkBJSQnBwcGYPn16vmVTp04F3p9ES2PAgAF49+4dvvrqK2hqauKrr74qtOz48eOxfPlyXL58GZ6enkVu19bWFgMGDEBgYGCJY3n69CmGDh2Kvn374saNG1i0aBEmTZokfhM9efJkbN++HZ6enpgwYQKsra0RFxeHf//9F5aWlvmG6CuJTp06oWHDhhg4cCAWL16MKlWq4Oeff0Z0dDRmzpxZ6u2VlJOTE6ysrDB9+nRkZ2cjJSUFc+bMQdWqVUu9rdynXvfr1w+9evXCoEGDoK6ujsuXL6NBgwbo3LkzBg0ahPXr18PLywtTpkyBo6MjkpKScPPmTWRmZmLRokWFbr9Zs2bYtWtXvvn379+XeUjb3bt3sXv3bmhra8sMIymRSDB48GBxSNCBAweiRo0a4khTp0+fxo8//oju3bvLJDnfffcdvLy8MHr0aPj6+uLMmTP4448/ZIbttbKywogRIzB16lQoKyujatWqWLhwIfT19YtsyyXx/fff49atW+jXrx9cXFyQnp6O48ePY+3atejevTuqV68uU/769evQ19fHu3fv8OjRI2zevBnXr1/H0qVL0aRJE5myipwjCjtvFaYs29qHwsLCcP/+/XwPScwdvWvNmjXo3r27+AVBSV7D69evx+XLl9G+fXtYWFjg2bNn2LZtm0xXK2dnZ5w+fRonTpyAoaEhbG1txSt3ec2ZMwevX7+Gl5cXzMzMcPfuXRw9ehSTJ08G3ne7GjNmDL744gtMnToVjRo1QlZWFkJDQ3HmzBns27cPADB9+nQ0aNAA3bt3x+jRoxEWFoZly5bl6wrVu3dvfP3115g3bx6aNm2K4ODgfCN8Kfr+kpejoyNGjRqFWbNmISEhAW3atEFaWhoOHz6MuXPnomrVqpg3bx4aNGgAHx8ffPnll6hSpQpiYmLw999/o0WLFujXr1+xxwnvh78NDAyUuVevIH///Tfi4uLELk2nT59GeHg4bGxsxNd39+7d0aBBA/Tu3RsBAQHQ1NTEokWLoK6uXuCVlbycnJxgZmaGv//+G6tWrQLe3w/VrFkzHDlyBAMGDJAp7+/vj6ZNm6Jz584YN24cXr16henTp6Nhw4al/vJhxowZePfuHfr37w8NDQ107txZofelN2/ewMfHB1988QVq1qyJzMxM/PzzzzAwMEDdunXFco8ePUJKSorMlZhKr7zvHqePKzk5WZg4caJgbW0tqKioCMbGxkKfPn3yjRRU2Egf7u7uMiOCFDZC06JFiwQA+UYUEQRBiImJEbp37y7o6uoKVapUEaZNmyZs2LChwFFKCnow1Id1lmR7wvsRO5ydnQV1dXWZkTsyMjKEefPmCQ4ODoKamppgamoqtGrVSti6dWuRxzLvSDMSiUTQ19cXPDw8hG+//TbfiEIlObZ5ubm5CQCEgwcPysy/dOmSAECwtLQsMra8MX44Ks3PP/8sKCkpCYGBgYWWEQRBmDVrlgCg0FGh8nr06JGgrKxc4lGhfvvtN2Hw4MGCrq6uYGBgIEyYMCHfQ+iio6OF4cOHCxYWFoKamppgZWUl9O7dW7h48aJYprARYwoaFUoQBCE+Pl4YMmSIYGRkJKirqwtNmjQRRx3JVVibLug4FXY8PozrypUrQoMGDQQNDQ3BwcFBCAwMzLe9kr7mBEEQDhw4IDRq1EjQ0NAQDAwMhNatWws3b94Ul79580aYNGmSYG1tLaiqqgoWFhZCx44dix1F7Pr16wKAfOeDOXPmFDiqUt6R5FJSUvKNkLVw4ULBxcVF0NHREVRVVQVHR0dh7ty5BT7Ucf/+/YKbm5ugpqYm1KhRQ9i0aVO+Munp6cI333wjmJmZCZqamkLbtm2FBw8eFLlPQglGZ7p8+bIwfPhwcQQbfX19wd3dXVi+fLnMCDu5/6PcH21tbcHR0VEYOnSocOXKlQLrlecckVdB562izo/ytLXCtpd3tMCVK1fKPLAwr7lz5wpWVlaCkpKSTJso7jV86dIloVOnTuJya2trYcKECTKjRN27d09o0aKFoKurK547CnLw4EGhTZs2gqmpqaCuri7Y29sLc+bMEbKyssQyOTk5ws8//yy4uroKampqgpGRkdCkSRNhxYoVMtvas2eP4OjoKKirqwv16tUT/vnnH0FfX19mVKisrCxhypQpQpUqVQR9fX3hq6++Ev74449858CSvL8U1D4TExPz7W92drawdOlSwcHBQVBVVRXMzc0FPz8/mYcshoaGCr6+voKxsbGgrq4u2NjYCIMGDRJHnCrJcerdu7dQpUqVAo9zXp6engWeFz48X8XFxQkDBw4U9PX1BU1NTaFdu3b5Rq0rTO/evQUAwq1bt8R5ixcvFgDkG+FLEATh7NmzQpMmTQR1dXXByMhIGDJkiPD69WtxeWk+WwiCIEyZMkVQV1cXTpw4IQgKvC+lp6eLI9tpamoKRkZGQrt27fKdN5YvXy5Ur15dZmTNyk4iFJeiEhHJKfehX0FBQXKPtEIfV7169Uo9GhLe3zOQO+67lZXVR4uPyke7du1Qp06dEt179SkyMDDAxIkTZR6S96mytrbG2LFj8e2335Z3KJ+dBg0aoEuXLqU+/1ZkvMeCiOgzNnv2bKxbt67U3RMvXryIwYMHM6n4RB0/fvyzTSo+JxEREUhNTS22mxKVvXPnzuHp06dl1i29ouA9FkREn7Fu3brh8ePHePHiBWrUqFHi9T6lb9iIPlfW1tbisNb030pOTsbWrVvzDT1c2bErFBERERERKYxdoYiIiIiISGFMLIiIiIiISGFMLIiIiIiISGFMLIiIiIiISGFMLIiIiIiISGEcblYBiYmJkEql5R1GhWBqaoq4uLjyDoMqEbYZkgfbDcmD7YbkwXbz/1RUVGBoaFiysh89mk+YVCpFVlZWeYdR7iQSCfD+eHD0YioJthmSB9sNyYPthuTBdiMfdoUiIiIiokrn3bt3aNasGZydnfMt++OPP9CiRQvUqFEDjRo1wrFjx8qs3qVLl6JNmzawtrYu8GGh586dg4+PDxwdHeHl5YUzZ86UWd0VHa9YEBEREVGls2zZMlStWhUJCQky87dt24YNGzbgl19+gYuLC+Lj45GWllZm9drY2MDf3x9//PFHvmXPnz/H8OHDsXbtWrRp0wanTp3CyJEjcerUKVSvXr3MYqioeMWCiIiIiCqVO3fu4OzZsxgzZozM/OzsbCxbtgzz58+Hq6srJBIJTE1NxQ/1L168QNWqVbFjxw40adIEDg4OWLBgAV69eoW+ffuiZs2a6NWrF2JjYwut29fXF61bt4aOjk6+ZWfOnIGbmxu8vb2hpKQEb29v1KlTB7t37wbe3587fPhw1KpVC87Ozmjfvj0iIyPL/PiUFyYWRERERFRpSKVSTJ06FQEBAVBVVZVZ9vTpU8TFxeHu3bto1KgR6tWrh6lTp+Lt27cy5S5evIhTp07h8OHD2LRpE0aNGoV58+bhzp07UFVVxapVq+SKTRCEfPdkCIKABw8eAADWrVsHqVSK69ev4969e1i2bBm0tbXlqqsiYmJBRERERJXGL7/8AldXVzRu3DjfsqSkJADA+fPnceTIEZw4cQIRERGYO3euTLkJEyZAS0sLjo6OqFWrFho2bIiaNWtCXV0d7du3x927d+WKrUWLFrh9+zaOHj0KqVSKo0eP4urVq2Jio6qqisTERISFhUFZWRmurq4lHnGpMuA9FkRERERUKTx79gy///57oTdja2lpAQDGjh0LIyMj8e8Pu0yZmpqKf2tqasLExERmOjU1Va74atSogV9++QXLly/HN998g/r166Nbt27iKKJff/01MjIyMGrUKLx9+xZdu3bFjBkzoKmpKVd9FQ0TCyIiIiKqFK5cuYL4+Hi0aNECeN8tKiUlBa6urti6dSucnZ2hoaFRrjH6+PjAx8dHnO7cuTN69+4NANDW1oa/vz/8/f0RERGBIUOGIDAwEKNGjSrHiMsOEwsiIiIiqhS6du0qJhUAcP36dUydOhXHjx+HiYkJ1NTU0LNnT6xduxZubm6QSCRYu3atzAd9RWVlZSE7OxvZ2dnIyclBeno6lJWVxfs9bt++DRcXF6Snp2PDhg1ITEyEr68vAODEiROws7ODra0tdHR0oKKiAhWVT+fj+KezJ0RERET0SdPU1JTpNhQeHg6JRAJLS0tx3rx58zBz5kw0adIEampqaNeuHebMmVNmMUydOhVBQUHi9G+//YY+ffrgp59+AgAsWrQIN2/ehEQiQYsWLRAUFCR20QoPD8fs2bMRFxcHbW1tdOzYEYMGDSqz2MqbRODjBOUWFxfHJ2+/fzqlhYUFoqOj+XRKKhG2GZIH2w3Jg+2G5MF28z+qqqoy96QUhaNCERERERGRwphYEBERERGRwniPBRERERGVu27bH5Z3CB94UN4BiPYPcCrvEEqEVyyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhTCyIiIiIiEhhKuUdQF579+7FlStXEBUVBTU1NTg6OmLgwIGwtLQUy8ydOxf379+XWa9t27b48ssvxen4+Hhs2LABISEh0NDQgKenJ/r37w9lZWWxTEhICLZu3YoXL17A2NgYvXr1gpeX13+0p0REREREn5YKlVjcv38fPj4+sLe3R3Z2Nv78808sWLAAK1asgIaGhliuTZs28PPzE6fV1NTEv3NycrBo0SIYGBhgwYIFSExMxOrVq6GsrIz+/fsDAGJjY7F48WJ4e3tj3LhxuHfvHtatWwcDAwPUqVPnP95rIiIiIqLKr0J1hfL394eXlxeqVasGGxsbjBkzBvHx8QgLC5Mpp66uDgMDA/FHS0tLXHb79m1ERkZi3LhxsLGxgYeHB/z8/HDs2DFIpVIAwPHjx2FmZoZBgwbBysoK7du3R+PGjXH48OH/fJ+JiIiIiD4FFSqx+FBaWhoAQEdHR2b++fPnMXz4cHzzzTf4448/kJGRIS4LDQ2FtbU1DAwMxHl16tTBu3fv8OLFCwDA48eP4ebmJrNNd3d3hIaGfuQ9IiIiIiL6NFWorlB55eTkYMuWLahZsyasra3F+c2bN4eJiQmMjIzw/PlzbN++HS9fvsSUKVMAAElJSTJJBQDo6+uLy3J/587LW+bdu3fIzMyU6VoFAFlZWcjKyhKnJRIJNDU1xb8/d7nHgMeCSopthuTBdkPyYLuhT0Flab8VNrHYtGkTXrx4gfnz58vMb9u2rfi3tbU1DA0NMX/+fMTExMDc3PyjxLJ3717s3r1bnLa1tcWSJUtgamr6UeqrrD7W8adPF9sMyYPthuTBdlMZPCjvACosCwuL8g6hRCpkYrFp0ybcuHED8+bNg7GxcZFla9SoAQBiYmFgYIAnT57IlHnz5g0AiFcyDAwMxHl5y2hqaua7WgEAPXr0QOfOncXp3KwxLi5OvG/jcyaRSGBubo6YmBgIglDe4VAlwDZD8mC7IXmw3dCnIDo6utzqVlFRKfGX6RUqsRAEAZs3b8aVK1cwd+5cmJmZFbtOeHg4AMDQ0BAA4OjoiD179uDNmzdid6c7d+5AU1MTVlZWAAAHBwfcvHlTZjt37tyBo6NjgXWoqqpCVVW10Jjp/wmCwONBpcI2Q/JguyF5sN1QZVZZ2m6Funl706ZNOH/+PCZMmABNTU0kJSUhKSkJmZmZwPurErt370ZYWBhiY2Nx7do1rFmzBs7OzqhevTrw/iZsKysrrF69GuHh4bh16xZ27NgBHx8fMTlo164dYmNjsW3bNkRFReHYsWO4fPkyOnXqVK77T0RERERUWVWoKxbHjx8H3j8EL6/Ro0fDy8sLKioquHv3LoKDg5GRkQFjY2M0atQIPXv2FMsqKSlh+vTp2LhxI2bNmgV1dXV4enrKPPfCzMwM06dPR2BgIIKDg2FsbIxRo0bxGRZERERERHKSCJXl2koFFBcXJzNa1OdKIpHAwsIC0dHRleZSHZUvthmSB9sNyYPtpvLotv1heYdQYe0f4FRudauqqpb4HosK1RWKiIiIiIgqJyYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMCYWRERERESkMJXyDiCvvXv34sqVK4iKioKamhocHR0xcOBAWFpaimUyMzOxdetWXLp0CVlZWXB3d8eIESNgYGAglomPj8eGDRsQEhICDQ0NeHp6on///lBWVhbLhISEYOvWrXjx4gWMjY3Rq1cveHl5/ef7TERERET0KahQVyzu378PHx8fBAQEYNasWcjOzsaCBQuQnp4ulgkMDMT169cxefJkzJs3D4mJiVi+fLm4PCcnB4sWLYJUKsWCBQswZswYnD17Fjt37hTLxMbGYvHixXBxccHSpUvRqVMnrFu3Drdu3frP95mIiIiI6FNQoRILf39/eHl5oVq1arCxscGYMWMQHx+PsLAwAEBaWhpOnz6NwYMHw9XVFXZ2dhg9ejQePXqE0NBQAMDt27cRGRmJcePGwcbGBh4eHvDz88OxY8cglUoBAMePH4eZmRkGDRoEKysrtG/fHo0bN8bhw4fLdf+JiIiIiCqrCtUV6kNpaWkAAB0dHQBAWFgYsrOz4ebmJpapWrUqTExMEBoaCkdHR4SGhsLa2lqma1SdOnWwceNGvHjxAra2tnj8+LHMNgDA3d0dW7ZsKTCOrKwsZGVlidMSiQSampri35+73GPAY0ElxTZD8mC7IXmw3dCnoLK03wqbWOTk5GDLli2oWbMmrK2tAQBJSUlQUVGBtra2TFl9fX0kJSWJZfImFbnLc5fl/s6dl7fMu3fvkJmZCTU1NZlle/fuxe7du8VpW1tbLFmyBKampmW6z5Wdubl5eYdAlQzbDMmD7YbkwXZTGTwo7wAqLAsLi/IOoUQqbGKxadMmvHjxAvPnzy/vUNCjRw907txZnM7NGuPi4sTuVZ8ziUQCc3NzxMTEQBCE8g6HKgG2GZIH2w3Jg+2GPgXR0dHlVreKikqJv0yvkInFpk2bcOPGDcybNw/GxsbifAMDA0ilUqSmpspctXjz5o14lcLAwABPnjyR2d6bN2/EZbm/c+flLaOpqZnvagUAqKqqQlVVtcBYeZL6H0EQeDyoVNhmSB5sNyQPthuqzCpL261QN28LgoBNmzbhypUrmD17NszMzGSW29nZQVlZGXfv3hXnvXz5EvHx8XB0dAQAODo6IiIiQiZxuHPnDjQ1NWFlZQUAcHBwkNlGbpncbRARERERUelUqMRi06ZNOH/+PCZMmABNTU0kJSUhKSkJmZmZAAAtLS20bt0aW7duxb179xAWFoa1a9fC0dFRTArc3d1hZWWF1atXIzw8HLdu3cKOHTvg4+MjXnVo164dYmNjsW3bNkRFReHYsWO4fPkyOnXqVK77T0RERERUWUmECnRtxdfXt8D5o0ePFh9el/uAvIsXL0IqlRb4gLy4uDhs3LgRISEhUFdXh6enJwYMGJDvAXmBgYGIjIyU+wF5cXFxMqNFfa4kEgksLCwQHR1daS7VUflimyF5sN2QPNhuKo9u2x+WdwgV1v4BTuVWt6qqaonvsahQiUVlw8Ti//GkTaXFNkPyYLshebDdVB5MLApXWRKLCtUVioiIiIiIKicmFkREREREpDAmFkREREREpDAmFkREREREpLBSPyAvNjYW165dw8OHDxEVFYXk5GRIJBLo6uqiatWqcHJyQv369fM9g4KIiIiIiD5dJU4srl+/joMHD+Lhw4cQBAHm5uYwMzNDtWrVAACpqal4/vw5/v33XwQGBsLJyQldu3ZFvXr1Pmb8RERERERUAZQosfD390d4eDgaNGiASZMmwc3NDVpaWgWWTUtLw507d/DPP//gxx9/RPXq1REQEFDWcRMRERERUQVSosTCxcUFU6dOlXkIXWG0tLTQuHFjNG7cGElJSQgODi6LOImIiIiIqAIrUWLRv39/uTZuYGAg97pERERERFR5cFQoIiIiIiJSWKlHhQKAu3fv4tmzZ+jatas47/Tp0wgKCoJUKkWzZs0waNAgKCkxbyEiIiIi+hzI9ck/KCgI4eHh4nRERAQ2bNgAPT091KpVC0eOHMGBAwfKMk4iIiIiIqrA5EosoqKiYG9vL06fO3cOmpqamD9/PiZNmoQ2bdrg3LlzZRknERERERFVYHIlFunp6dDU1BSnb926hTp16kBdXR0AUKNGDcTFxZVdlEREREREVKHJlViYmJjg6dOnAICYmBi8ePECtWvXFpenpKRAVVW17KIkIiIiIqIKTa6bt5s3b47du3cjISEBkZGR0NbWRoMGDcTlYWFhsLCwKMs4iYiIiIioApMrsejZsyekUilu3rwJExMTjB49Gtra2sD7qxUhISHo2LFjWcdKREREREQVlFyJhbKyMvr164d+/frlW6ajo4MNGzaURWxERERERFRJ8EETRERERESksBIlFr/++itiY2NLvfGYmBj8+uuv8sRFRERERESVSIm6Qr1+/RoTJkyAm5sbmjZtCldXV5iYmBRYNjY2Fnfv3sXly5cREhIiM1oUERERERF9mkqUWMyYMQMPHz7EwYMHsX79euTk5EBXVxempqbQ0dGBIAhITU1FbGwsUlJSoKSkBA8PD8yZMwdOTk4ffy+IiIiIiKhclfjmbScnJzg5OSE5ORnXr19HaGgoXr58idevXwMAdHV10bBhQzg6OqJu3brQ19f/mHETEREREVEFUupRofT09NCqVSu0atXq40RERERERESVDkeFIiIiIiIihTGxICIiIiIihTGxICIiIiIihTGxICIiIiIihTGxICIiIiIihTGxICIiIiIihZV6uNm8QkNDERISgjdv3sDHxwcWFhbIyMhAVFQULC0toaGhUXaREhERERFRhSVXYiGVSvHTTz/h6tWr4rz69evDwsICEokEAQEB6NSpE3r27FmWsRIRERERUQUlV1eoHTt24Pr16xg5ciR++uknmWVqampo3LixTNJBRERERESfNrkSi4sXL6Jdu3Zo27YtdHR08i2vWrUqYmNjyyI+IiIiIiKqBORKLJKTk2FtbV34RpWUkJGRoUhcRERERERUiciVWBgbGyMqKqrQ5Y8ePYK5ubkicRERERERUSUiV2LRvHlznDx5EqGhofmWnTx5EpcvX0bLli3LIj4iIiIiIqoE5BoVqmfPnnj8+DHmzJmDqlWrAgACAwORkpKChIQEeHh4oHPnzmUdKxERERERVVByJRYqKiqYOXMmzp8/j3/++Qc5OTmQSqWoXr06+vbti5YtW0IikZR9tEREREREVCHJ/YA8iUSCli1bsssTERERERHJd48FERERERFRXnJfsXj48CFOnz6N2NhYpKamQhAEmeUSiQQ//PBDqbZ5//59HDhwAM+ePUNiYiKmTJmChg0bisvXrFmDv//+W2Ydd3d3+Pv7i9MpKSnYvHkzrl+/DolEgkaNGmHo0KHQ0NAQyzx//hybNm3C06dPoaenh/bt26Nbt25yHAUiIiIiIoK8icWhQ4fw+++/Q01NDZaWlgU+JE8eGRkZsLGxQevWrbFs2bICy9SpUwejR48Wp1VUZHdh1apVSExMxKxZs5CdnY21a9di/fr1mDBhAgAgLS0NCxYsgJubG0aOHImIiAj88ssv0NbWRtu2bctkP4iIiIiIPjdyJRYHDhyAk5MTpk2bBi0trTILxsPDAx4eHkWWUVFRgYGBQYHLIiMjcevWLSxatAj29vYAgGHDhmHRokX44osvYGRkhAsXLkAqlWL06NFQUVFBtWrVEB4ejkOHDjGxICIiIiKSk1yJRUZGBpo3b16mSUVJ3b9/HyNGjIC2tjZcXV3Rt29f6OrqAgBCQ0Ohra0tJhUA4ObmBolEgidPnqBhw4YIDQ2Fs7OzzJUOd3d37N+/HykpKQVefcnKykJWVpY4LZFIoKmpKf79ucs9BjwWVFJsMyQPthuSB9sNfQoqS/uVK7FwcXFBRERE2UdTjDp16qBRo0YwMzNDTEwM/vzzTyxcuBABAQFQUlJCUlIS9PT0ZNZRVlaGjo4OkpKSAABJSUkwMzOTKZN7BSQpKanAxGLv3r3YvXu3OG1ra4slS5bA1NT0I+1p5cSnrVNpsc2QPNhuSB5sN5XBg/IOoMKysLAo7xBKRK7EYtiwYQgICMCBAwfQunXrMrvHojjNmjUT/7a2tkb16tUxbtw4hISEwM3N7aPV26NHD5kH/uVmjXFxcZBKpR+t3spCIpHA3NwcMTEx+W7iJyoI2wzJg+2G5MF2Q5+C6OjocqtbRUWlxF+my5VYmJiYoG3btvj999+xfft2qKmpQUkp/8i1gYGB8my+xKpUqQJdXV3ExMTAzc0NBgYGSE5OlimTnZ2NlJQU8aqEgYGBePUiV+50YfduqKqqQlVVtcBlPEn9jyAIPB5UKmwzJA+2G5IH2w1VZpWl7cqVWOzcuRN79uyBkZER7O3ty+VeCwB4/fo1UlJSYGhoCABwdHREamoqwsLCYGdnBwC4d+8eBEFAjRo1xDJ//vknpFKpeJ/FnTt3ynR0KyIiIiKiz41cicWJEydQt25dTJ06tcArFfJKT09HTEyMOB0bG4vw8HDo6OhAR0cHQUFBaNSoEQwMDPDq1Sts27YN5ubmcHd3BwBYWVmhTp06WL9+PUaOHAmpVIrNmzejadOmMDIyAgA0b94cQUFBWLduHbp164YXL17gyJEjGDx4cJntBxERERHR50auxEIqlaJu3bplmlQAwNOnTzFv3jxxeuvWrQAAT09P8ZkTf//9N1JTU2FkZITatWvDz89PppvS+PHjsWnTJsyfP198QN6wYcPE5VpaWpg1axY2bdqE6dOnQ1dXF7169eJQs0RERERECpAIcnTaWrVqFfD+Q/znLC4uTmYY2s+VRCKBhYUFoqOjK00fQCpfbDMkD7YbkgfbTeXRbfvD8g6hwto/wKnc6lZVVS3xzdtyXXLo06cPoqKisHHjRoSFhSE5ORkpKSn5foiIiIiI6PMgV1eoiRMnAgDCw8Nx4sSJQsvt3LlT/siIiIiIiKjSkCux6NWrV6V5AiAREREREX18ciUWvr6+ZR8JERERERFVWmU7rBMREREREX2WSnTFYvfu3QCAnj17QklJSZwuTu/evRWLjoiIiIiIKoUSJRZBQUEAgO7du0NJSUmcLg4TCyIiIiKiz0OJEosPR3fiaE9ERERERJQX77EgIiIiIiKFyZVY+Pn54cKFC4Uuv3TpEvz8/BSJi4iIiIiIKpGPcsUiJyeHz7kgIiIiIvqMlHlikZaWhlu3bkFXV7esN01ERESfiIyMDEydOhWNGzeGo6MjWrZsiR07dsiUefv2LcaMGYOaNWvC3d0dP/74Y5nG8Ntvv6FDhw6wtbXFsGHD8i3/2PUTfWpK/IC8oKAgmWFmf/75Z/z888+Flu/QoYPi0REREdEnKTs7G2ZmZtixYweqV6+OGzdu4IsvvoCFhQU8PT0BALNmzUJSUhKuXLmC+Ph49O3bF1ZWVujTp0+ZxFClShVMmDAB58+fR3R0dL7lH7t+ok9NiROLGjVqwMfHB4Ig4Pjx46hduzYsLCzyldPQ0ICdnR0aNmxY1rESERHRJ0JLSwtTp04Vp+vVq4emTZviypUr8PT0xLt373DgwAHs27cP+vr60NfXx7Bhw7Bjxw7xg33VqlWxYMECBAYGIjIyEh06dMD333+P6dOn48yZM6hevTrWrl1b4OcVAOjYsSMAICQkJF9iUVz9giBg4cKFCAoKwrt372Bqaoo5c+bA29v7ox43ooqsxImFh4cHPDw8gPeXL729veHg4PAxYyMiIqLPRHp6Om7evInu3bsDAJ4+fYrMzEy4uLiIZVxcXPL1ljh27Bj27t2LjIwM+Pj4oHfv3li4cCFWr16NqVOnYsGCBTh+/Hip4ymu/nPnzmHv3r04evQozM3NERUVhfT0dAWOAFHlV+LEIq/Ro0eXfSRERET0WRIEAVOnToWtra14FSE1NRVaWlpQUfnfRxU9PT2kpKTIrDtq1CgYGhoCABo3bgxlZWWx10Tnzp0xbdo0uWIqrn4VFRVkZGQgNDQUxsbGqFq1qlz1EH1K5Eos8H7kp1u3biE2NjbfizwXn7xNRERERREEATNmzMDTp0+xY8cOKCn9/7gy2traePfuHaRSqfjhPjk5GTo6OjLrm5iYiH9rampCT09PZjo1NVWuuIqrv1mzZvjmm2+wdOlSPHnyBC1atMB3330Ha2trueoj+hTIlVg8ffoUy5cvx+vXr4ssx8SCiIiICiMIAmbOnImbN29i586dMkmBvb09VFVVcf/+fdSuXRsAcP/+fTg5Of0nsZWk/iFDhmDIkCFITk7GjBkz8N133yEwMPA/iY+oIpIrsdi4cSMyMzMxdepUODs7Q1tbu+wjIyIiok+av78/rl69il27dsHAwEBmmaamJrp06YIffvgBa9asQXx8PDZv3ixzw7eipFKp+JOTk4P09HQoKSlBTU2t2Ppv3bqFrKwsuLu7Q0NDA1paWrzHgj57ciUWERER6Nu3L+rXr1/2EREREdEnLzIyEoGBgVBXV0ejRo3E+T179sSSJUsAAAEBAZg2bRrq168PDQ0NDB06tEyHel25ciVWrFghTtvb26NJkybi8PpF1f/27VvMnz8fz58/h6qqKurWrYvFixeXWWxElZFEEAShtCuNGzcO3t7e6Nq168eJqpKIi4tDVlZWeYdR7iQSCSwsLBAdHQ05mhN9hthmSB5sNyQPtpvKo9v2h+UdQoW1f8B/0wWwIKqqqjA1NS1RWbmevN2tWzecOnUKaWlp8qxORERERESfGLm6QqWnp0NDQwPjx49H06ZNYWJiIo7ikFfnzp3LIkYiIiIiIqrg5Eosfv/9d/HvY8eOFVqOiQUREdHnqWJ1a3lQ3gGIyrNLC9HHJldisXr16rKPhIiIiIiIKi25EouS3sBBRERERESfB7lu3iYiIiIiIspLrisWY8aMgUQiKbKMRCLBzz//LG9cRERERERUiciVWNSqVStfYpGTk4O4uDg8evQI1apVg62tbVnFSEREREREFZzcVywKEx4ejoCAADRv3lyRuIiIiIiIqBIp83ssbGxs4O3tje3bt5f1pomIiIiIqIL6KDdv6+vrIzIy8mNsmoiIiIiIKqAyTyzevn2L06dPw9jYuKw3TUREREREFZRc91jMmzevwPlpaWmIioqCVCrF2LFjFY2NiIiIiIgqCbkSC0EQChxu1tTUFG5ubmjVqhWqVq1aFvEREREREVElIFdiMXfu3GLLFJZ8EBERERHRp6fM77GQSqU4efIkJk6cWNabJiIiIiKiCqpUVyykUimuXbuGmJgY6OjooG7dujAyMgIAZGRk4OjRowgODkZSUhKqVKnysWImIiIiIqIKpsSJRUJCAubNm4eYmBhxnpqaGr799luoqKhg1apVSEhIQI0aNTB06FA0atToY8VMREREREQVTIkTix07diA2NhbdunWDk5MTYmNj8ddff+HXX39FcnIyqlWrhnHjxqFWrVofN2IiIiIiIqpwSpxY3LlzB15eXujfv784z8DAAD/++CM8PDzw7bffQknpozxvj4iIiIiIKrgSJxZv3ryBg4ODzDxHR0cAQOvWrcskqbh//z4OHDiAZ8+eITExEVOmTEHDhg3F5YIgYNeuXTh16hRSU1Ph5OSEESNGwMLCQiyTkpKCzZs34/r165BIJGjUqBGGDh0KDQ0Nsczz58+xadMmPH36FHp6emjfvj26deumcPxERERERJ+rEmcDOTk5UFNTk5mnqqoKANDS0iqTYDIyMmBjY4Phw4cXuHz//v04cuQIRo4ciYULF0JdXR0BAQHIzMwUy6xatQovXrzArFmzMH36dDx48ADr168Xl6elpWHBggUwMTHB4sWLMXDgQAQFBeHkyZNlsg9ERERERJ+jUo0KFRsbi7CwMHE6LS0NABAdHV1gcmFnZ1eqYDw8PODh4VHgMkEQEBwcjJ49e6JBgwYAgLFjx2LkyJG4evUqmjVrhsjISNy6dQuLFi2Cvb09AGDYsGFYtGgRvvjiCxgZGeHChQuQSqUYPXo0VFRUUK1aNYSHh+PQoUNo27ZtqeIlIiIiIqL/V6rEYufOndi5c2e++Rs3biy0fFmJjY1FUlISateuLc7T0tJCjRo1EBoaimbNmiE0NBTa2tpiUgEAbm5ukEgkePLkCRo2bIjQ0FA4OztDReV/u+7u7o79+/cjJSUFOjo6+erOyspCVlaWOC2RSKCpqSn+/bnLPQY8FlRSbDMkD7Yb+hSw/ZI8Kku7KXFi8fXXX3/cSIqRlJQEANDX15eZr6+vLy5LSkqCnp6ezHJlZWXo6OjIlDEzM5MpY2BgIC4rKLHYu3cvdu/eLU7b2tpiyZIlMDU1LbP9+xSYm5uXdwhUybDNfDpWr16NLVu24O7du+jQoQP27dtXYLlXr17B2dkZ1tbWuHXrllx1FdVuCtr++fPn0aFDB5lyaWlpGDt2LFatWiVXDFQSD8o7gAop732h9CG2mcJUlnZT4sTCy8vr40ZSgfXo0QOdO3cWp3Ozxri4OEil0nKMrGKQSCQwNzdHTEwMBEEo73CoEmCb+fRoampi9OjROH/+PKKjoxEdHV1guS+//BIuLi5ISEgotExhStJuCtp+jRo18PjxY7FMXFwc6tWrh7Zt25Y6BiJFsc2RPMqz3aioqJT4y/RSdYUqT7lXFd68eQNDQ0Nx/ps3b2BjYyOWSU5OllkvOzsbKSkp4voGBgbi1YtcudO5ZT6kqqoq3qj+IX4o+h9BEHg8qFTYZj4duVcEQkJCEB0dXeD/9dixY0hMTESvXr2wceNGscyLFy/QuHFjLF++HCtXrkR8fDwGDx6MkSNHYsKECbh58yZcXV2xbt06mJubF9puCtv+h3bt2gVbW1vUr18fgiAgIyMDM2bMwPHjxyGVSmFpaYkVK1agTp06ZX6ciHjOI3lUlnZTaR48YWZmBgMDA9y9e1ecl5aWhidPnojD3jo6OiI1NVXmBvN79+5BEATUqFFDLPPgwQOZKw137tyBpaVlgd2giIhIccnJyZg3bx4WL15caJmLFy/i1KlTOHz4MDZt2oRRo0Zh3rx5uHPnDlRVVYvstlSS7efasWMH+vbtK04HBQXh/v37uHjxIh48eIANGzawqysRkRwqVGKRnp6O8PBwhIeHA+9v2A4PD0d8fDwkEgk6duyIPXv24Nq1a4iIiMDq1athaGgojhJlZWWFOnXqYP369Xjy5AkePnyIzZs3o2nTpjAyMgIANG/eHCoqKli3bh1evHiBS5cu4ciRIzJdnYiIqGwtWLAAffr0KXK0wAkTJkBLSwuOjo6oVasWGjZsiJo1a0JdXR3t27eX+WJJnu0DwL///ouIiAj06dNHnKeqqoqUlBQ8fvwYgiDA3t4eVatWlXNPiYg+XxWqK9TTp08xb948cXrr1q0AAE9PT4wZMwbdunVDRkYG1q9fj7S0NDg5OWHmzJkyz9cYP348Nm3ahPnz54sPyBs2bJi4XEtLC7NmzcKmTZswffp06OrqolevXhxqlojoI/n3339x7do1HD16tMhyea8SaGpqwsTERGY6NTVVoe0DwJ9//glvb28YGxuL83r16oVXr15h+vTpiI6Ohre3N2bPni1+IUVERCVToRILFxcX7Nq1q9DlEokEfn5+8PPzK7SMjo4OJkyYUGQ91atXx/z58xWKlehT9dtvv2HXrl14+PAhWrVqhc2bN8ssHzlyJK5du4a0tDQYGhqib9++mDhxYpnHERcXBy8vL1haWuLEiRPi/HPnziEgIADPnj2DpaUl5syZg1atWpV5/VR2Lly4gOfPn6Nu3boAgMzMTKSnp8PV1RWnTp366NuvUqUKAODt27c4dOgQNmzYILO+iooKxo8fj/HjxyMuLg6jR4/GihUrsGDBAoVjIyL6nFSoxIKIyl+VKlUwYcIEcXSfD02ePBl2dnZQV1dHVFQUBgwYgGrVqqFXr15lGoe/vz9cXFyQmJgoznv+/DmGDx+OtWvXok2bNjh16hRGjhyJU6dOoXr16mVaP5WOVCoVf3JycpCeng4lJSWoqanhyy+/RL9+/cSyhw4dwp9//ont27fDxMQEL1++VKju4rafa9++fTA0NISnp6fM+hcuXICBgQGcnJygpaUFDQ0NKCsrKxQTEdHnqELdY0FE5a9jx45o3759od1AnJ2doa6uLk4rKSnh2bNnwPvRfapWrYodO3agSZMmcHBwwIIFC/Dq1Sv07dsXNWvWRK9evRAbG1tkDMeOHUNSUlK+ZOXMmTNwc3ODt7c3lJSU4O3tjTp16ojPmUlMTMTw4cNRq1YtODs7o3379oiMjCyDo0LFWblyJezt7bFq1SqcOHEC9vb26N+/PwBAV1cXlpaW4o++vj5UVFRgaWlZJh/gS7r9HTt2wM/PD0pKsm998fHxGDNmDJydndG4cWPo6upi8uTJCsdFRPS54RULIiq1GTNmYNeuXUhPT4eVlRV8fX1llueO7hMZGQkfHx9cv34dixcvho2NDQYPHoxVq1Zh06ZNBW47d3Sfbdu24erVqzLLChpmVBAEPHjw/w9VWrduHaRSKa5fvw41NTU8ePAA2traZb7/lN8333yDb775pkRlP+zSWq1aNURFRcmUyftQ0tx18o7kVJrt5zp8+HCB5bt3747u3buXaNtERFQ4XrEgolJbtGgRHj9+jODgYPTu3Rv6+voyyz/W6D4tWrTA7du3cfToUUilUhw9ehRXr17F27dvgfej+yQmJiIsLAzKyspwdXWVee4NERERfTxMLIhILkpKSnB3d4eOjg6+//57mWWKju4zZsyYApfXqFEDv/zyC1asWAF3d3f8+eef6Natm5g8fP3112jUqBFGjRqFOnXqYPbs2Xj37l0Z7TEREREVhV2hiEghWVlZ4j0WiirJ6D4+Pj7w8fER1+ncuTN69+4NANDW1oa/vz/8/f0RERGBIUOGIDAwEKNGjSqT+D5X3bY/LO8Q8nhQ3gHI2D/AqbxDICKqMHjFgohkSKVSpKeny4zuk5mZCQCIjIzE4cOHkZqaipycHFy9ehWbN2/ON8qOvL788kucP38ex48fx/HjxzFlyhTY29vj+PHj4lWP27dvQyqVIiUlBT/++CMSExPFezxOnDiBp0+fIicnBzo6OlBRUYGKCr8/ISIi+i/wHZeIZKxcuRIrVqwQp+3t7dGkSRPxZtqNGzdiypQpyMnJQZUqVTB06FCMHTu2TOrW1dWFrq6uOJ13dJ9cixYtws2bNyGRSNCiRQsEBQVBS0sLABAeHo7Zs2cjLi4O2tra6NixIwYNGlQmsREREVHRJMKHQ6xQicXFxSErK6u8wyh3EokEFhYWiI6OzjdiD1FB2GYql4rVFapiYVeowrHdFIxtpnBsM4Urz3ajqqoqc+9kUdgVioiIiIiIFMbEgoiIiIiIFMZ7LIg+IxXrMjNH9yEiIvqU8IoFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpjIkFEREREREpTKW8AyiNXbt2Yffu3TLzLC0t8dNPPwEAMjMzsXXrVly6dAlZWVlwd3fHiBEjYGBgIJaPj4/Hhg0bEBISAg0NDXh6eqJ///5QVlb+z/eHiIiIiOhTUakSCwCoVq0avvvuO3FaSel/F10CAwNx48YNTJ48GVpaWti0aROWL1+O77//HgCQk5ODRYsWwcDAAAsWLEBiYiJWr14NZWVl9O/fv1z2h4iIiIjoU1DpukIpKSnBwMBA/NHT0wMApKWl4fTp0xg8eDBcXV1hZ2eH0aNH49GjRwgNDQUA3L59G5GRkRg3bhxsbGzg4eEBPz8/HDt2DFKptJz3jIiIiIio8qp0iUVMTAy++uorjB07FqtWrUJ8fDwAICwsDNnZ2XBzcxPLVq1aFSYmJmJiERoaCmtra5muUXXq1MG7d+/w4sWLctgbIiIiIqJPQ6XqCuXg4IDRo0fD0tISiYmJ2L17N2bPno3ly5cjKSkJKioq0NbWlllHX18fSUlJAICkpCSZpCJ3ee6ywmRlZSErK0uclkgk0NTUFP/+3OUeAx4LqszYfkkebDdUWmwzJI/K0m4qVWLh4eEh/l29enUx0bh8+TLU1NQ+Wr179+6VuWnc1tYWS5Ysgamp6UerszIyNzcv7xCoWA/KO4AKy8LCorxDqMDYbgrDdlMUtpuCsM0UhW2mMJWl3VSqxOJD2trasLS0RExMDGrXrg2pVIrU1FSZqxZv3rwRr1IYGBjgyZMnMtt48+aNuKwwPXr0QOfOncXp3KwxLi6O92a8Px7m5uaIiYmBIAjlHQ6RXKKjo8s7BKqE2G6otNhmSB7l2W5UVFRK/GV6pU4s0tPTERMTgxYtWsDOzg7Kysq4e/cuGjduDAB4+fIl4uPj4ejoCABwdHTEnj178ObNG7EL1J07d6CpqQkrK6tC61FVVYWqqmqBy/hB+n8EQeDxoEqLbZfkwXZDpcU2Q/KoLO2mUiUWW7duRf369WFiYoLExETs2rULSkpKaN68ObS0tNC6dWts3boVOjo60NLSwubNm+Ho6CgmFu7u7rCyssLq1asxYMAAJCUlYceOHfDx8Sk0cSAiIiIiouJVqsQiISEBK1euxNu3b6GnpwcnJycEBASIQ84OHjwYEokEy5cvh1QqFR+Ql0tJSQnTp0/Hxo0bMWvWLKirq8PT0xN+fn7luFdERERERJVfpUosJk6cWORyNTU1jBgxQiaZ+JCpqSlmzJjxEaIjIiIiIvp8VbrnWBARERERUcXDxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKIiIiIiBTGxIKKNWvWLNSvXx81a9ZEvXr1MHv2bGRmZiI+Ph5jx45FvXr1oKenB29vbxw/fry8wyUiIiKicsDEgoo1ePBgnDt3Do8ePcKJEydw//59rF27FqmpqXB1dcXBgweRlJSEqVOnYvTo0QgNDS3vkImIiIjoP8bEgorl4OAALS0tAIAgCFBSUsKzZ89QvXp1jBo1CpaWllBSUkK7du1gb2+PGzduAAAyMjIwefJkuLq6wsnJCa1bt8atW7fKeW+IiIiI6GNQKe8AqHJYvXo1Vq5cibS0NBgaGsLf3z9fmfj4eDx58gTOzs4AgKCgINy/fx8XL16Enp4ewsLCoKGhUQ7RExEREdHHxisWVCJjx47F48ePcfbsWXzxxRcwNTWVWZ6ZmYmvv/4anTt3hru7OwBAVVUVKSkpePz4MQRBgL29PapWrVpOe0BEREREHxMTCyoVBwcH1KpVC5MmTRLnZWZmonfv3tDU1MQPP/wgzu/Vqxd8fX0xffp0uLm5YeLEiUhISCinyImIiIjoY2JiQaUmlUrx7Nkz4H1S8eWXXyIzMxMbNmyAmpqaWE5FRQXjx4/HyZMncfbsWURFRWHFihXlGDkRERERfSxMLKhIqamp2LlzJ968eQNBEPDgwQOsXLkSXl5eyMrKwqhRo5CWloZ9+/ZBXV1dZt0LFy7g3r17kEql0NLSgoaGBpSVlcttX4iIiIjo4+HN21QkiUSCvXv3Yv78+cjMzISJiQk6duyIKVOm4Nq1azh27Bg0NDRgYmICQRAAAOPGjcP48eMRHx8Pf39/vHz5EhoaGmjRogUmT55c3rtERERERB8BEwsqkpaWFnbs2FHgsiZNmiAqKgoSiQQWFhaIjo4WkwsA6N69O7p37/4fRktERERE5YVdoYiIiIiISGGf9RWLo0ePik+Nrl69OoYNG4YaNWqUd1hERERERJXOZ5tYXLp0CVu3bsXIkSPh4OCAw4cPIyAgAD/99BP09fXLO7xiddv+sLxD+MCD8g5AtH+AU3mHQERERPTZ+Wy7Qh06dAht2rRBq1atYGVlhZEjR0JNTQ1nzpwp79CIiIiIiCqdzzKxkEqlCAsLg5ubmzhPSUkJbm5uCA0NLdfYiIiIiIgqo8+yK1RycjJycnJgYGAgM9/AwAAvX77MVz4rKwtZWVnitEQigaamJlRUyu/w1TTTKbe6KzpVVdXyDqHCYrspHNtN4dhuCsd2Uzi2m4KxzRSObaZw5dluSvN597NMLEpr79692L17tzjdrFkzTJgwAYaGhuUW07bBpuVWN1VebDckD7YbkgfbDZUW20zl91l2hdLT04OSkhKSkpJk5iclJeW7igEAPXr0wJYtW8SfkSNHylzB+Ny9e/cO06ZNw7t378o7FKok2GZIHmw3JA+2G5IH2418PsvEQkVFBXZ2drh37544LycnB/fu3YOjo2O+8qqqqtDS0pL54aXM/xEEAc+ePZN5OB5RUdhmSB5sNyQPthuSB9uNfD7brlCdO3fGmjVrYGdnhxo1aiA4OBgZGRnw8vIq79CIiIiIiCqdzzaxaNq0KZKTk7Fr1y4kJSXBxsYGM2fOLLArFBERERERFe2zTSwAoH379mjfvn15h1Hpqaqqonfv3uweRiXGNkPyYLshebDdkDzYbuQjEdh5jIiIiIiIFPRZ3rxNRERERERli4kFEREREREpjIkFVRpnz56Fr68vfH19yzuUz9aaNWuwdOnSUq/n6+uLK1eufJSYiOjzEBsbC19fX4SHh5d3KERUCCYWVOHMnTsXvr6+WLNmjcx8PT09ODg4wMHBodxio6Lt2rULU6dOzTf/119/hYeHx38SQ0hICHbt2vWf1EVEipk7dy62bNlS3mEoZO7cueUdAn0E8n6RFhsbm+/zy+eEiQVVGnXr1kVAQAACAgLKOxQqJQMDg48+ssbx48fx5s0bcVoqleLgwYOQSqUftV4i+rSU5Jxx/fp1hIWFycy7ePEiXr58+REjo4rs/PnziImJEacFQcDRo0eRkpJSrnH91z7r4WYrqxs3buCvv/5CVFQUpFIpDA0NYWdnh5EjR0JHRwc3b97Evn378OzZM2RnZ8Pe3h6+vr5wdXUVt/H8+XP8+uuvCA8Ph6WlJYYNG4Y5c+YAAHr37g1fX1+EhIRg3rx5AIDVq1fDzMwMeN+tBQBGjx4tPlAwKioKO3fuREhICNLS0mBubo4OHTqgXbt2Yp1jxoxBXFwcunbtioyMDFy8eBFKSkpo1qwZBg0aBGVlZZluTn///Tf+/vtvsf779+9j7dq1wPtvxvH+hRwcHIzY2FikpaVBQ0MDNWrUgJ+fH2rUqPHR/xefqn/++QdBQUGIiYmBuro6bG1tC7wS8eTJEyxatAhdunSBgYEBdu/eDRTQRnx9fTFlyhQ0bNgQsbGxGDt2LCZOnIijR4/i6dOnsLa2xrhx45CWloaNGzciKioKzs7OGDt2LPT09MT6Tp06hUOHDiE2Nhampqbo0KEDfHx8AAAmJiZYunQpatSogcTERMybNw8NGzaERCL5z44blcytW7fw119/4cWLF1BSUoKjoyOGDBkCc3NzAEB8fDy2bt2KO3fuQCKRwNnZGUOGDBHPQTk5OdizZw9OnjyJ5ORkVK1aFQMGDECdOnWA998Yjh07Ft988w2OHj2Kx48fw8LCAiNHjoSjo2O57jv9z5o1a3D//n3cv38fwcHBAIAlS5bg4MGDuH37NtLT02FsbIwePXqgVatWBW4jIiIC27Ztw4MHD6ChoYHatWtj8ODB4nmjuLaW93x07NgxPHnyBCNHjkRISAhSU1Ph5OSEQ4cOQSqVomnTphgyZAhUVFRQpUoVBAYGwsHBAampqVixYgV0dHTg7u7+Hx5BKsrcuXNRrVo1AMC5c+egoqICb29v+Pn54a+//sLly5exfPlymXWmTp2KevXqQUlJSfz8kft+NmfOHNSsWROBgYH4999/kZqaCn19fXh7e6NHjx4wMzPDmjVr4OTkhISEBCxcuBC2traf3XC1TCwqmeTkZCxbtgxSqRQmJibQ1tZGfHw8Ll++jAEDBuDOnTtYuXIlBEGAqakpJBIJHj58iAULFmDWrFlwdXVFZmYmFi1ahISEBCgrK0MqlWLx4sVyxxQdHQ1/f3+kpaVBR0cHlpaWiIyMxMaNG5GcnIzevXvLlD98+DA0NTWhpqaGhIQEHDlyBNWqVUPbtm3h4OCAyMhIvHv3Drq6uuLJv7AX5tOnTxEREQETExMYGRnh5cuXuH37NkJDQ7Fy5Uo+8FAOiYmJWLlyJQYMGICGDRsiPT0dDx48yFfu3r17WLZsGQYOHIi2bdsiMzMTERERuH37Nr777jsAgJaWVqH1BAUFYfDgwTAxMcEvv/yCVatWQVNTE0OGDIG6ujp+/PFH7Ny5EyNHjgTeJ5G7du3CsGHDYGtri2fPnmH9+vVQV1eHl5cX6tatCycnJ/j7+yM+Ph7z58+Hra3tRzxSJK/09HR07twZ1atXR3p6Onbu3Illy5Zh6dKlyMnJQUBAABwdHTF//nwoKSlhz549WLhwIZYtWwYVFRUEBwfj4MGD+PLLL2Fra4vTp09jyZIlWLFiBSwsLMR6duzYgS+++ALm5ubYsWMHVq5ciVWrVkFZWblc95/+39ChQxEdHY1q1arBz88PeH9eiIyMxMyZM6Grq4uYmBhkZmYWuH5qairmz5+P1q1bY/DgwcjMzMT27dvx448/il+UFdXWlJT+12lj+/btGDRokPhBMCQkBCEhITA0NMScOXMQExODn376CTY2Nmjbti2srKzg7++PlStX4vnz5/Dx8UHbtm3/oyNHJfX333+jdevWWLRoEZ4+fYpff/0VJiYmaNWqFYKCgvDkyRPxS8hnz54hIiICU6ZMgb6+PqKiovDu3TuMHj0aAKCjo4Pg4GBcu3YNkyZNgomJCV6/fo34+HgAQM2aNTFnzhx8//33ePToEaZNm/afdQGuSJhYVDLx8fGQSqXQ1NTETz/9BDU1NQiCgKdPn0JPTw/bt2+HIAho1aoVRo0aBQBYvnw5rly5gl27dsHV1RUXLlxAQkICAGDatGmoU6cOTp8+jXXr1skV0969e5GWloZq1aph4cKFUFdXR3BwMLZs2YJ9+/ahU6dO0NTUFMsbGxtj6dKlUFFRwbhx45CYmIh79+6hbdu2CAgIwNy5c3H//n3UrVsXY8aMKbJuHx8f9OvXD+rq6gCAmJgYjB8/Hu/evcONGzfQunVrufbpc5aYmIjs7Gw0atQIpqamAABra2uZMleuXMHq1asxatQoNG3aFACgpqYGDQ0NKCkplSih69Kli/gNc8eOHbFy5UrMnj0bTk5OAIDWrVvj7NmzYvldu3bhiy++QKNGjQAAZmZmiIyMxMmTJ+Hl5YVbt24hKCgItWvXRmJiIn777Tc0btwY7du3l/kAQeWvcePGMtNff/01RowYgcjISISHh0MQBIwaNUq82jR69GgMGTIEISEhcHd3x8GDB9GtWzc0a9YMADBw4ECEhITg8OHDGDFihLjdLl26oG7dusD7bx0nT56MmJgYVK1a9T/dXyqYlpYWVFRUoK6uLp4zEhISYGNjA3t7e+D967wwR48eha2tLfr37y/O+/rrr/H111/j5cuXsLS0LLKt5T2vderUSTy35NLR0cHw4cOhpKSEqlWrwsPDQ3yvevnyJbZs2QJ7e3tUr14dd+7cQXh4OPr27QsdHZ0yO0akGGNjYwwePBgSiQSWlpaIiIjA4cOH0bZtW9SpUwdnz54VE4szZ86gVq1aqFKlCvD+PS0rK0vm/Sw+Ph4WFhZwcnKCRCIR3yMB4PHjx9i2bRscHR3FL0BCQ0PRo0cPqKmplcPelw8mFpWMlZUVqlSpglevXmHEiBGwsLBAtWrV0LhxY5iZmSEuLg54/wI5c+aMzLqPHz8GALx48QIAoK6uLn6wa9KkidyJxZMnT8TtfvHFFzLLMjMz8fz5c/HDIgDUr19f/CbbzMwMiYmJMn3jSyM1NRUbN25EWFgY0tLSkPd5j7nJE5WOjY0N3NzcMGXKFLi7u6N27dpo3Lix+Gb55MkT3LhxA5MnT0bDhg3lrifvm7q+vn6B83LbRXp6Ol69eoV169Zh/fr1YpmcnByxLcXGxuLbb79FZGQkQkJCMH78eAQHByMnJ4eJRQUTHR2NnTt34smTJ3j79i1ycnKA92/az58/R0xMDAYNGiSzTlZWFl69eoW0tDQkJibKnFPw/tvC58+fy8zL255yPxy8efOGiUUF1q5dOyxfvhzPnj2Du7s7GjRogJo1axZY9vnz57h3716+9x0AePXqFSwtLYtsa3nbh52dXb5tWFlZyZw7DA0NERERAQB4+fIl+vbtCzs7Ozx48ACTJ0/GhQsXkJyczMSiAnFwcJDpDuvo6IhDhw4hJycHbdq0wS+//IJBgwZBSUkJFy9exODBg4vcnpeXFxYsWICJEyfC3d0d9erVE7u/RUdH4+uvv4aSkhKCgoIwevRoHDt2DJmZmUwsqOJSU1PD4sWLce7cOTx+/BhRUVE4f/48zp07h0mTJonlqlSpItM3PVdpbmTN+2LMPRmnpaUVWj5v16W8PvxQl7d7TG6XBHkeAJ+eno6AgACkpqZCVVUVNjY2UFFREROo3JipdJSUlDBr1iw8evQId+7cwdGjR7Fjxw4sXLgQeN+2dHV1cebMGdStWxcqKvKdRvKul9vW8nZRkUgkYrtIT08HAHz11Vf5RgXLbV+59/NERkaK2+/atatcsdHHtWTJEpiamuKrr76CoaEhBEHAN998A6lUivT0dNjZ2WH8+PH51ivonFaUgtqYPOca+u94eHhg7dq1uHHjBu7cuYP58+fDx8cnX6KJ9+eFevXqYeDAgfmW5SaSRbW1vDQ0NPJt48Muc3nPSfXr189Xvnnz5nLsMZWXevXqQUVFBVeuXIGKigqkUmm+K1wfsrOzw+rVq3Hr1i3cuXMHP/74I9zc3PDNN9+gZcuWwPsvufC+vbRv3/4/2ZeKhIlFJZOWloaoqCi0b98eHTp0AAAEBATg9u3bePDgAUxNTREXFwdbW1tMmDBBPDG+fPkS8fHxUFFREW9mysjIwO3bt+Hu7o5//vknX11538Sjo6Nhbm6Oy5cv5ytnb2+PyMhIaGlpYcaMGeK3NcnJybh3716pb5bM7daUkZFRZLmXL18iNTUVeH95u3nz5ggNDcWsWbNKVR/lJ5FI4OTkBCcnJ/Tu3RujR48Wn0Ohq6uLKVOmYO7cufjxxx8xadIk8QOciorKR0noDAwMYGhoiFevXqFFixZFlnVxcYGLi0uZx0Bl4+3bt3j58iW++uorODs7AwAePnwoLre1tcWlS5egp6dX6D06hoaGePjwIWrVqiXOe/ToEQdsqIQKOmfo6enBy8sLXl5eOHHiBLZt21ZgYmFra4t///0XpqamBd43U1xbKyscbrbiyu1Rkevx48cwNzcXv5Dy9PTE2bNnoaKigmbNmslcWSjs/UxLSwtNmzZF06ZN0bhxYyxcuBApKSniZx8zM7Niu3F/yphYVDLJycmYNWsWtLW1YWxsDKlUKg5vZ21tDUdHR6xatQr//PMP7t+/DyMjI7GrkaenJ2rXro3mzZtj165dSEhIwJIlS2Bubo7Xr1/nq8vCwgImJiaIj4/HqlWrYGNjg0ePHuUr16NHD1y5cgWvXr3C119/DQsLC6SkpCAhIQHGxsZiH/ySsrS0xM2bN/Hvv/9i2rRp0NPTg7+/f75yZmZmUFdXR0ZGBtatW4d9+/bJ3aWK/ufx48e4e/cu3N3doa+vj8ePH4sj7+R2NdHX18ecOXMwb948rFy5EhMnToSysjLMzMwQGxuL8PBwGBkZQVNTs8xGxPD19cVvv/0GLS0t1KlTB1KpFE+fPkVqaio6d+5cJnXQx6etrQ1dXV2cPHkShoaGiI+Px/bt28XlLVq0wMGDB/HDDz/A19cXxsbGiIuLw7///otu3brB2NgYXbt2xa5du2Bubg4bGxucOXMG4eHhBV7loIrN1NQUjx8/RmxsLDQ0NHDkyBHY2dmhWrVqyMrKwvXr1wvtuubj44NTp05h5cqV6Nq1K3R0dBATE4NLly5h1KhRxbY1+vTFx8cjMDAQ3t7eCAsLw5EjR2SS1DZt2oi9Pb7//nuZdU1NTXH79m28fPkSOjo60NLSwtGjR2FgYABbW1tIJBL8888/MDAwKHKgks8NE4tKRkdHB15eXuKJWBAEVK1aFS1btkSbNm0gkUigpaWFAwcOICwsDC9fvoSRkRHc3d3Rpk0b4H13qunTp2PDhg149uwZJBIJpk2blu9bF2VlZUycOBGbNm1CZGQkUlJSMGXKlHwjSFlaWiIgIAC7du1CSEgIXrx4AQMDA9SpU6fUSQXe33AZERGBx48f49mzZ9DV1S30WEyePBm///47Xr16BRUVFUybNg0zZ84sdZ30P5qamnjw4AGCg4Px7t07mJiYYNCgQfDw8MClS5fEcgYGBpg9ezbmzp2LVatWYcKECWjUqBH+/fdfzJs3D6mpqTJDEiuqTZs2UFdXx4EDB7Bt2zaoq6vD2toanTp1KpPt039DSUkJEyZMwG+//YZvvvkGlpaWGDp0qHj+UVdXx7x587Bt2zYsW7YM6enpMDIygqurqzgIRIcOHZCWloatW7fizZs3sLKywrRp02RGhKLKoUuXLlizZg0mT56MzMxM+Pn54Y8//kBcXBzU1NTg5OSEiRMnFriukZERvv/+e2zfvh0BAQHIysqCqakp3N3dIZFIIJFIimxr9Olr2bIlMjMzMWPGDCgpKaFjx44yo3dZWFigZs2aSElJydfNtm3btrh//z6mT5+O9PR0zJkzBxoaGjhw4ACio6OhpKSEGjVqiNum/ycR2OGU3ssdqzn3ORZEREREldHcuXNhY2ODIUOGFFpGEASMHz8ePj4+vPJdRnjFgoiIiIg+K8nJybh48SKSkpLK7Mo6MbEgIiIios/MiBEjoKuri6+++opDBJchdoUiIiIiIiKF8W4TIiIiIiJSGBMLIiIiIiJSGBMLIiIiIiJSGBMLIiIiIiJSGBMLIiIiIiJSGBMLIiKqkMaMGYPFixeXdxhERFRCfI4FERGVyNmzZ7F27VpxWlVVFSYmJqhduzZ69eoFAwODco2PiIjKFxMLIiIqFV9fX5iZmSErKwsPHz7E8ePHcfPmTSxfvhzq6urlHR4REZUTJhZERFQqHh4esLe3BwC0adMGurq6OHToEK5evYrmzZuXd3hERFROmFgQEZFCXF1dcejQIcTGxuLAgQO4cuUKXr58iYyMDFhZWaFHjx5o3LhxvvXOnTuHI0eO4MWLF1BVVYW1tTV69uwJd3f3Qus6e/Ys1q1bh06dOuGLL75ASkoK9uzZg9u3byM2NhZKSkqoWbMm+vfvDxsbG5l14+LisHnzZty7dw/q6upo3rw56tSpg4ULF2LOnDlwcXERyz5+/Bi7du1CaGgosrOzYW9vj379+sHJyamMjx4R0aeDN28TEZFCYmJiAAC6uro4cuQIbGxs4Ovri379+kFZWRkrVqzAjRs3ZNYJCgrC6tWroaKiAl9fX/Tp0wfGxsa4d+9eofWcPHkSv/zyC7p3744vvvgCAPDq1StcvXoV9erVw+DBg9GlSxdERERg7ty5SEhIENdNT0/H/PnzcffuXXTo0AE9e/ZEaGgotm/fnq+ee/fuYc6cOXj37h369OmDfv36IS0tDfPnz8eTJ0/K8MgREX1aeMWCiIhKJS0tDcnJycjKysKjR4/w119/QU1NDfXq1YOnpyfU1NTEsu3bt8e0adNw6NAh1K1bF3ifiOzevRsNGzbE5MmToaT0v++4BEEosM7g4GAEBgbC19cXvXr1EudbW1tj5cqVMtto2bIlJk2ahNOnT6N3797A+6Tk1atXmDp1Kho0aAAAaNu2LaZNmyZTjyAI2LBhA1xcXDBz5kxIJBIAgLe3NyZPnowdO3Zg1qxZZXQkiYg+LUwsiIioVL7//nuZaVNTU4wbNw5GRkYy81NSUpCTkwNnZ2dcvHhRnH/lyhUIgoDevXvLJAQAxA/yee3fvx/bt2/HwIED0bVrV5llqqqq4t85OTlITU2FhoYGLC0t8ezZM3HZrVu3YGRkhPr164vz1NTU0KZNG2zdulWcFx4ejujoaPTs2RNv376VqcvV1RXnz59HTk5OvriJiIiJBRERldLw4cNhYWEBZWVl6Ovrw9LSUvygff36dezZswfh4eHIysoS18mbMLx69QoSiQRWVlbF1nX//n3cuHED3bp1y5dU4H0yERwcjOPHjyM2NhY5OTniMh0dHfHvuLg4VKlSJV/iYm5uLjMdHR0NAFizZk2hMaWlpclsm4iI/h8TCyIiKpUaNWqIo0Ll9eDBAyxduhTOzs4YPnw4DA0NoaysjLNnz+LChQty1VWtWjWkpqbi3Llz8Pb2hpmZmczyvXv3YufOnWjVqhX8/Pygo6MDiUSCwMDAQrtVFSV3nYEDB+a7+TuXhoaGXPtCRPSpY2JBRERl4t9//4Wqqir8/f1luiidPXtWplyVKlUgCAIiIyML/fCeS1dXF5MnT8bs2bMxf/58zJ8/X6bL1T///AMXFxd8/fXXMuulpqZCV1dXnDY1NUVkZCQEQZC5apF743ne2ABAS0sLtWvXLvUxICL6nLGTKBERlQklJSVIJBKZ7kixsbG4evWqTLmGDRtCIpFg9+7dMmVRyM3bxsbG+O6775CZmYkFCxbI3PtQ0L0Oly9flhkRCgDc3d2RkJCAa9euifMyMzNx6tQpmXJ2dnaoUqUKDh48iPT09HzbTk5OLuYoEBF9vnjFgoiIykTdunVx6NAhLFy4EM2aNUNycjKOHTsGc3NzPH/+XCxnbm6Onj174q+//sKcOXPQsGFDqKqq4smTJzAyMkL//v3zbdvc3ByzZs3C3LlzERAQgNmzZ0NLSwv16tXD7t27sXbtWjg6OiIiIgIXLlwQrzzk8vb2xtGjR7Fy5Up07NgRBgYGuHDhgnhlJfcqhpKSEkaNGoWFCxdi8uTJ8PLygpGRERISEhASEgJNTU1Mnz79ox9LIqLKiFcsiIioTLi6umLUqFFISkpCYGAgLl68iAEDBojDu+bl5+eHr7/+GpmZmdixYwd27tyJ+Ph4uLq6Frp9a2trzJw5E9HR0ViyZAkyMzPRo0cPdO7cGbdv38aWLVvw7NkzTJ8+HcbGxjLramhoYM6cOXB1dUVwcDD27NkDJycncejavF23XFxcEBAQADs7Oxw7dgy//fYb/v77bxgYGKBz585lesyIiD4lEkGeu9uIiIg+AYcPH0ZgYCDWrVuXb7hcIiIqHV6xICKiz0JmZma+6ZMnT8LCwoJJBRFRGeA9FkRE9FlYtmwZTExMYGNjg7S0NJw/fx5RUVEYP358eYdGRPRJYFcoIiL6LBw+fBinT58WH6RnZWWFbt26oWnTpuUdGhHRJ4GJBRERERERKYz3WBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcKYWBARERERkcL+D1Ku0R/qYZLOAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(8, 4))\n", + "\n", + "runtimes = [31.871, 828.855, 887.367, 1210.012, 2778.706]\n", + "labels = [\"sequentia\", \"sktime*\", \"aeon\", \"tslearn*\", \"pyts*\"]\n", + "\n", + "bars = ax.bar(labels, runtimes, width=0.5, color=\"C1\")\n", + "ax.set(xlabel=\"Package\", ylabel=\"Runtime (s)\")\n", + "ax.set_title(\n", + " (\n", + " \"Univariate DTW-kNN performance \"\n", + " \"(1,500 FSDD train/test sequences, 16 workers)\"\n", + " ),\n", + " fontsize=11,\n", + ")\n", + "\n", + "\n", + "def fmt(s: float) -> str:\n", + " \"\"\"Formats the runtime.\"\"\"\n", + " if s < 60:\n", + " return f\"{round(s)}s\"\n", + " m, s = divmod(s, 60)\n", + " return f\"{round(m)}m {round(s)}s\"\n", + "\n", + "\n", + "for bar in bars:\n", + " plt.text(\n", + " bar.get_x() + bar.get_width() / 2,\n", + " bar.get_height(),\n", + " fmt(bar.get_height()),\n", + " ha=\"center\",\n", + " va=\"bottom\",\n", + " fontsize=9,\n", + " )\n", + "\n", + "for lab in ax.get_xticklabels():\n", + " if lab.get_text() == \"sequentia\":\n", + " lab.set_fontweight(\"bold\")\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"benchmark.svg\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07aeb22f-d8be-4759-9012-1a3e9479343a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 0000000..1353452 --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,6 @@ +# python==3.12.8 +sequentia==2.1.0 +aeon==1.0.0 +tslearn==0.6.3 +sktime==0.35.0 +pyts==0.13.0 diff --git a/benchmarks/run.sh b/benchmarks/run.sh new file mode 100755 index 0000000..ed732a8 --- /dev/null +++ b/benchmarks/run.sh @@ -0,0 +1,19 @@ +echo "sequentia" +python test_sequentia.py --n-jobs 16 --number 10 +echo + +echo "aeon" +python test_aeon.py --n-jobs 16 --number 10 +echo + +echo "tslearn" +python test_tslearn.py --n-jobs 16 --number 10 +echo + +echo "sktime" +python test_sktime.py --n-jobs 16 --number 10 +echo + +echo "pyts" +python test_pyts.py --n-jobs 16 --number 10 +echo diff --git a/benchmarks/test_aeon.py b/benchmarks/test_aeon.py new file mode 100644 index 0000000..a03f13e --- /dev/null +++ b/benchmarks/test_aeon.py @@ -0,0 +1,82 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Runtime benchmarks for aeon's dynamic time warping +k-nearest neighbors algorithm. +""" + +from __future__ import annotations + +import timeit +import typing as t + +import numpy as np +from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier +from aeon.transformations.collection import Padder +from dtaidistance import dtw_ndim +from utils import load_dataset + +from sequentia.datasets.base import SequentialDataset + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + +DataSplit: t.TypeAlias = tuple[np.ndarray, np.ndarray] + + +def distance(s1: np.ndarray, s2: np.ndarray) -> float: + """DTAIDistance DTW measure - not used.""" + # need to transpose sequences again + return dtw_ndim.distance(s1.T, s2.T, use_c=True) + + +def prepare(data: SequentialDataset) -> DataSplit: + """Prepare the dataset - padding.""" + # transpose sequences and pad + X = [x.T for x, _ in data] + padder = Padder() + X_pad = padder.fit_transform(X) + # X_pad = X_pad.astype("float64") + return X_pad, data.y + + +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: + """Fit and predict the classifier.""" + # initialize model + clf = KNeighborsTimeSeriesClassifier( + n_neighbors=1, + n_jobs=n_jobs, + distance="dtw", + # distance=distance, + ) + + # fit model + X_train, y_train = train_data + clf.fit(X_train, y_train) + + # predict model + X_test, _ = test_data + clf.predict(X_test) + + +if __name__ == "__main__": + import argparse + + parser: argparse.ArgumentParser = argparse.ArgumentParser() + parser.add_argument("--n-jobs", type=int, default=1) + parser.add_argument("--number", type=int, default=10) + args: argparse.Namespace = parser.parse_args() + + train_data, test_data = load_dataset(multivariate=False) + train_data, test_data = prepare(train_data), prepare(test_data) + + benchmark = timeit.timeit( + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + globals=locals(), + number=args.number, + ) + + print(args) # noqa: T201 + print(f"{benchmark:.3f}s") # noqa: T201 diff --git a/benchmarks/test_pyts.py b/benchmarks/test_pyts.py new file mode 100644 index 0000000..09a3d96 --- /dev/null +++ b/benchmarks/test_pyts.py @@ -0,0 +1,77 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Runtime benchmarks for pyts's dynamic time warping +k-nearest neighbors algorithm. +""" + +from __future__ import annotations + +import timeit +import typing as t + +import numpy as np +from aeon.transformations.collection import Padder +from pyts.classification import KNeighborsClassifier +from utils import load_dataset + +from sequentia.datasets.base import SequentialDataset + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + +DataSplit: t.TypeAlias = tuple[np.ndarray, np.ndarray] + + +def prepare(data: SequentialDataset, length: int) -> DataSplit: + """Prepare the dataset - pad and flatten.""" + # transpose sequences and pad + X = [x.T for x, _ in data] + padder = Padder(pad_length=length) + X_pad = padder.fit_transform(X) + return X_pad[:, 0], data.y + + +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: + """Fit and predict the classifier.""" + # initialize model + clf = KNeighborsClassifier( + n_neighbors=1, + n_jobs=n_jobs, + metric="dtw", + ) + + # fit model + X_train, y_train = train_data + clf.fit(X_train, y_train) + + # predict model + X_test, _ = test_data + clf.predict(X_test) + + +if __name__ == "__main__": + import argparse + + parser: argparse.ArgumentParser = argparse.ArgumentParser() + parser.add_argument("--n-jobs", type=int, default=1) + parser.add_argument("--number", type=int, default=10) + args: argparse.Namespace = parser.parse_args() + + train_data, test_data = load_dataset(multivariate=False) + length = max(train_data.lengths.max(), test_data.lengths.max()) + train_data, test_data = ( + prepare(train_data, length=length), + prepare(test_data, length=length), + ) + + benchmark = timeit.timeit( + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + globals=locals(), + number=args.number, + ) + + print(args) # noqa: T201 + print(f"{benchmark:.3f}s") # noqa: T201 diff --git a/benchmarks/test_sequentia.py b/benchmarks/test_sequentia.py new file mode 100644 index 0000000..521d222 --- /dev/null +++ b/benchmarks/test_sequentia.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Runtime benchmarks for sequentia's dynamic time warping +k-nearest neighbors algorithm. +""" + +from __future__ import annotations + +import timeit + +import numpy as np +from utils import load_dataset + +import sequentia +from sequentia.datasets.base import SequentialDataset + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + + +def run( + *, train_data: SequentialDataset, test_data: SequentialDataset, n_jobs: int +) -> None: + """Fit and predict the classifier.""" + # initialize model + clf = sequentia.models.KNNClassifier( + k=1, + use_c=True, + n_jobs=n_jobs, + random_state=random_state, + classes=train_data.classes, + ) + + # fit model + clf.fit(X=train_data.X, y=train_data.y, lengths=train_data.lengths) + + # predict model + clf.predict(X=test_data.X, lengths=test_data.lengths) + + +if __name__ == "__main__": + import argparse + + parser: argparse.ArgumentParser = argparse.ArgumentParser() + parser.add_argument("--n-jobs", type=int, default=1) + parser.add_argument("--number", type=int, default=10) + args: argparse.Namespace = parser.parse_args() + + train_data, test_data = load_dataset(multivariate=False) + + benchmark = timeit.timeit( + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + globals=locals(), + number=args.number, + ) + + print(args) # noqa: T201 + print(f"{benchmark:.3f}s") # noqa: T201 diff --git a/benchmarks/test_sktime.py b/benchmarks/test_sktime.py new file mode 100644 index 0000000..7fc5297 --- /dev/null +++ b/benchmarks/test_sktime.py @@ -0,0 +1,96 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Runtime benchmarks for sktime's dynamic time warping +k-nearest neighbors algorithm. +""" + +from __future__ import annotations + +import timeit +import typing as t + +import numpy as np +import pandas as pd +from dtaidistance import dtw_ndim +from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier +from utils import load_dataset + +from sequentia.datasets.base import SequentialDataset + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + +DataSplit: t.TypeAlias = tuple[pd.Series, np.ndarray] + + +def distance(s1: pd.Series, s2: pd.Series) -> np.ndarray: + """DTAIDistance DTW measure - not used.""" + s1, s2 = s1.droplevel(1), s2.droplevel(1) + m = s1.index.max() + 1 + n = s2.index.max() + 1 + matrix = np.zeros((m, n)) + for i in range(m): + a = np.trim_zeros(s1.loc[i].to_numpy(dtype=np.float64)) + for j in range(n): + b = np.trim_zeros(s2.loc[j].to_numpy(dtype=np.float64)) + matrix[i][j] = dtw_ndim.distance(a, b, use_c=True) + return matrix + + +def pad(x: np.ndarray, length: int) -> np.ndarray: + """Pad a sequence with zeros.""" + return np.concat((x, np.zeros((length - len(x), x.shape[-1])))) + + +def prepare(data: SequentialDataset) -> DataSplit: + """Prepare the dataset - pad and convert to multi-indexed + Pandas DataFrame. + """ + # convert to padded pandas multi-index + length = data.lengths.max() + X = [pd.DataFrame(pad(x, length=length)) for x, _ in data] + X_pd = pd.concat(X, keys=range(len(X)), axis=0) + return X_pd, data.y + + +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: + """Fit and predict the classifier.""" + # initialize model + clf = KNeighborsTimeSeriesClassifier( + n_neighbors=1, + n_jobs=n_jobs, + distance="dtw", + # distance=distance, + ) + + # fit model + X_train, y_train = train_data + clf.fit(X_train, y_train) + + # predict model + X_test, _ = test_data + clf.predict(X_test) + + +if __name__ == "__main__": + import argparse + + parser: argparse.ArgumentParser = argparse.ArgumentParser() + parser.add_argument("--n-jobs", type=int, default=1) + parser.add_argument("--number", type=int, default=10) + args: argparse.Namespace = parser.parse_args() + + train_data, test_data = load_dataset(multivariate=False) + train_data, test_data = prepare(train_data), prepare(test_data) + + benchmark = timeit.timeit( + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + globals=locals(), + number=args.number, + ) + + print(args) # noqa: T201 + print(f"{benchmark:.3f}s") # noqa: T201 diff --git a/benchmarks/test_tslearn.py b/benchmarks/test_tslearn.py new file mode 100644 index 0000000..b8e0306 --- /dev/null +++ b/benchmarks/test_tslearn.py @@ -0,0 +1,83 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Runtime benchmarks for tslearn's dynamic time warping +k-nearest neighbors algorithm. +""" + +from __future__ import annotations + +import timeit +import typing as t + +import numpy as np +from aeon.transformations.collection import Padder +from dtaidistance import dtw_ndim +from tslearn.neighbors import KNeighborsTimeSeriesClassifier +from utils import load_dataset + +from sequentia.datasets.base import SequentialDataset + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + +DataSplit: t.TypeAlias = tuple[np.ndarray, np.ndarray] + + +def distance(s1: np.ndarray, s2: np.ndarray) -> float: + """DTAIDistance DTW measure - not used.""" + return dtw_ndim.distance(s1, s2, use_c=True) + + +def prepare(data: SequentialDataset, length: int) -> DataSplit: + """Prepare the dataset - padding.""" + # pad sequences - zeros/nans are not ignored (!!!) + X = [x.T for x, _ in data] + padder = Padder(pad_length=length) + X_pad = padder.fit_transform(X) + # X_pad[(X_pad == 0).all(axis=1, keepdims=True)] = np.nan + return X_pad, data.y + + +def run(*, train_data: DataSplit, test_data: DataSplit, n_jobs: int) -> None: + """Fit and predict the classifier.""" + # initialize model + clf = KNeighborsTimeSeriesClassifier( + n_neighbors=1, + n_jobs=n_jobs, + ) + + # fit model + X_train, y_train = train_data + clf.fit(X_train, y_train) + + # predict model + X_test, _ = test_data + clf.predict(X_test) + + +if __name__ == "__main__": + import argparse + + parser: argparse.ArgumentParser = argparse.ArgumentParser() + parser.add_argument("--n-jobs", type=int, default=1) + parser.add_argument("--number", type=int, default=10) + args: argparse.Namespace = parser.parse_args() + + train_data, test_data = load_dataset(multivariate=False) + length = max(train_data.lengths.max(), test_data.lengths.max()) + train_data, test_data = ( + prepare(train_data, length=length), + prepare(test_data, length=length), + ) + + benchmark = timeit.timeit( + "run(train_data=train_data, test_data=test_data, n_jobs=args.n_jobs)", + globals=locals(), + number=args.number, + ) + + print(args) # noqa: T201 + print(f"{benchmark:.3f}s") # noqa: T201 diff --git a/benchmarks/utils.py b/benchmarks/utils.py new file mode 100644 index 0000000..7a52713 --- /dev/null +++ b/benchmarks/utils.py @@ -0,0 +1,60 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Utilities for benchmarking.""" + +from __future__ import annotations + +import numpy as np + +from sequentia.datasets.base import SequentialDataset +from sequentia.datasets.digits import load_digits + +__all__ = ["load_dataset"] + +np.random.seed(0) +random_state: np.random.RandomState = np.random.RandomState(0) + + +def load_dataset( + *, multivariate: bool +) -> tuple[SequentialDataset, SequentialDataset]: + """Loads the Free Spoken Digit Dataset.""" + # load data + data: SequentialDataset = load_digits() + + # split dataset + train_data, test_data = data.split( + test_size=0.5, + random_state=random_state, + shuffle=True, + stratify=True, + ) + + if multivariate: + # return untransformed data + return train_data, test_data + + # retrieve features + X_train, X_test = train_data.X, test_data.X + + # reduce to one dimension + X_train = X_train.mean(axis=-1, keepdims=True) + X_test = X_test.mean(axis=-1, keepdims=True) + + # return splits + train_split: SequentialDataset = SequentialDataset( + X=X_train, + y=train_data.y, + lengths=train_data.lengths, + classes=train_data.classes, + ) + test_split: SequentialDataset = SequentialDataset( + X=X_test, + y=test_data.y, + lengths=test_data.lengths, + classes=test_data.classes, + ) + return train_split, test_split diff --git a/docs/source/__init__.py b/docs/source/__init__.py index 1c08549..b49759a 100644 --- a/docs/source/__init__.py +++ b/docs/source/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/docs/source/_static/css/toc.css b/docs/source/_static/css/toc.css index 3a8238c..d08fe3f 100644 --- a/docs/source/_static/css/toc.css +++ b/docs/source/_static/css/toc.css @@ -1,9 +1,7 @@ -/* Adds overflow to the Table of Contents on the side bar */ -div[aria-label="main navigation"] div.sphinxsidebarwrapper div:first-child { +div.sphinxsidebarwrapper { overflow-x: auto; } -/* Hides any API reference lists in the Table of Contents */ -div[aria-label="main navigation"] div.sphinxsidebarwrapper div:first-child a[href="#api-reference"] + ul { +div.sphinxsidebarwrapper a[href="#definitions"] + ul > li > ul { display: none; -} \ No newline at end of file +} diff --git a/docs/source/conf.py b/docs/source/conf.py index deb2e21..1e0f753 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -21,9 +21,9 @@ # -- Project information ----------------------------------------------------- project = "sequentia" -copyright = "2019-2025, Sequentia Developers" # noqa: A001 +copyright = "2019, Sequentia Developers" # noqa: A001 author = "Edwin Onuonga (eonu)" -release = "2.0.2" +release = "2.5.0" # -- General configuration --------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 961fcaf..d8fc7b2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -42,6 +42,7 @@ Features sections/models/index sections/preprocessing/index + sections/model_selection/index sections/datasets/index sections/configuration diff --git a/docs/source/sections/configuration.rst b/docs/source/sections/configuration.rst index 62d1e9c..755269e 100644 --- a/docs/source/sections/configuration.rst +++ b/docs/source/sections/configuration.rst @@ -13,7 +13,10 @@ API Reference ~sequentia.enums.TopologyMode ~sequentia.enums.TransitionMode -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. automodule:: sequentia.enums :members: diff --git a/docs/source/sections/datasets/digits.rst b/docs/source/sections/datasets/digits.rst index 9206723..dc56611 100644 --- a/docs/source/sections/datasets/digits.rst +++ b/docs/source/sections/datasets/digits.rst @@ -4,4 +4,9 @@ Digits API reference ------------- +.. _definitions: + +Definitions +^^^^^^^^^^^ + .. autofunction:: sequentia.datasets.load_digits diff --git a/docs/source/sections/datasets/gene_families.rst b/docs/source/sections/datasets/gene_families.rst index 77add39..87c4979 100644 --- a/docs/source/sections/datasets/gene_families.rst +++ b/docs/source/sections/datasets/gene_families.rst @@ -4,4 +4,9 @@ Gene Families API reference ------------- +.. _definitions: + +Definitions +^^^^^^^^^^^ + .. autofunction:: sequentia.datasets.load_gene_families diff --git a/docs/source/sections/datasets/index.rst b/docs/source/sections/datasets/index.rst index 29cf5cd..90c17ad 100644 --- a/docs/source/sections/datasets/index.rst +++ b/docs/source/sections/datasets/index.rst @@ -49,7 +49,10 @@ Properties ~sequentia.datasets.base.SequentialDataset.lengths ~sequentia.datasets.base.SequentialDataset.y -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.datasets.base.SequentialDataset :members: diff --git a/docs/source/sections/model_selection/index.rst b/docs/source/sections/model_selection/index.rst new file mode 100644 index 0000000..e61aeb5 --- /dev/null +++ b/docs/source/sections/model_selection/index.rst @@ -0,0 +1,20 @@ +Model Selection +=============== + +.. toctree:: + :titlesonly: + + searching.rst + splitting.rst + +---- + +For validating models and performing hyper-parameter selection, it is common +to use cross-validation methods such as those in :mod:`sklearn.model_selection`. + +Although :mod:`sklearn.model_selection` is partially compatible with Sequentia, +we define our own wrapped versions of certain classes and functions to allow +support for sequences. + +- :ref:`searching` defines methods for searching hyper-parameter spaces in different ways, such as :class:`sequentia.model_selection.GridSearchCV`. +- :ref:`splitting` defines methods for partitioning data into training/validation splits for cross-validation, such as :class:`sequentia.model_selection.KFold`. diff --git a/docs/source/sections/model_selection/searching.rst b/docs/source/sections/model_selection/searching.rst new file mode 100644 index 0000000..e5b6635 --- /dev/null +++ b/docs/source/sections/model_selection/searching.rst @@ -0,0 +1,101 @@ +.. _searching: + +Hyper-parameter search methods +============================== + +In order to optimize the hyper-parameters for a specific model, +hyper-parameter search methods are used (often in conjunction with +:ref:`cross-validation methods `) to evaluate the performance of a model +with different configurations and find the optimal settings. + +:mod:`sklearn.model_selection` provides such hyper-parameter search methods, +but does not support sequence data. Sequentia provides modified +versions of these methods to support sequence data. + +API reference +------------- + +Classes/Methods +^^^^^^^^^^^^^^^ + +.. autosummary:: + + ~sequentia.model_selection.param_grid + ~sequentia.model_selection.GridSearchCV + ~sequentia.model_selection.RandomizedSearchCV + ~sequentia.model_selection.HalvingGridSearchCV + ~sequentia.model_selection.HalvingRandomSearchCV + +Example +^^^^^^^ + +Using :class:`.GridSearchCV` with :class:`.StratifiedKFold` to +cross-validate a :class:`.KNNClassifier` training pipeline. :: + + import numpy as np + + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import minmax_scale + + from sequentia.datasets import load_digits + from sequentia.models import KNNClassifier + from sequentia.preprocessing import IndependentFunctionTransformer + from sequentia.model_selection import StratifiedKFold, GridSearchCV + + EPS: np.float32 = np.finfo(np.float32).eps + + # Define model and hyper-parameter search space + search = GridSearchCV( + # Create a basic pipeline with a KNNClassifier to be optimized + estimator=Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("clf", KNNClassifier(use_c=True, n_jobs=-1)) + ] + ), + # Optimize over k, weighting function and window size + param_grid={ + "clf__k": [1, 2, 3, 4, 5], + "clf__weighting": [ + None, lambda x: 1 / (x + EPS), lambda x: np.exp(-x) + ], + "clf__window": [1.0, 0.75, 0.5, 0.25, 0.1], + }, + # Use StratifiedKFold cross-validation + cv=StratifiedKFold(), + n_jobs=-1, + ) + + # Load the spoken digit dataset with a train/test set split + data = load_digits() + train_data, test_data = data.split(test_size=0.2, stratify=True) + + # Perform cross-validation over accuracy and retrieve the best model + search.fit(train_data.X, train_data.y, lengths=train_data.lengths) + clf = search.best_estimator_ + + # Calculate accuracy on the test set split + acc = clf.score(test_data.X, test_data.y, lengths=test_data.lengths) + +.. _definitions: + +Definitions +^^^^^^^^^^^ + +.. autofunction:: sequentia.model_selection.param_grid + +.. autoclass:: sequentia.model_selection.GridSearchCV + :members: __init__ + :exclude-members: __new__ + +.. autoclass:: sequentia.model_selection.RandomizedSearchCV + :members: __init__ + :exclude-members: __new__ + +.. autoclass:: sequentia.model_selection.HalvingGridSearchCV + :members: __init__ + :exclude-members: __new__ + +.. autoclass:: sequentia.model_selection.HalvingRandomSearchCV + :members: __init__ + :exclude-members: __new__ \ No newline at end of file diff --git a/docs/source/sections/model_selection/splitting.rst b/docs/source/sections/model_selection/splitting.rst new file mode 100644 index 0000000..f2a8d9d --- /dev/null +++ b/docs/source/sections/model_selection/splitting.rst @@ -0,0 +1,114 @@ +.. _splitting: + +Cross-validation splitting methods +================================== + +During cross-validation, a dataset is divided into splits for training and validation. + +This can be either be done using a single basic split, or alternatively via successive +*folds* which re-use parts of the dataset for different splits. + +:mod:`sklearn.model_selection` provides such cross-validation splitting methods, +but does not support sequence data. Sequentia provides modified +versions of these methods to support sequence data. + +API reference +------------- + +Classes +^^^^^^^ + +.. autosummary:: + + ~sequentia.model_selection.KFold + ~sequentia.model_selection.StratifiedKFold + ~sequentia.model_selection.ShuffleSplit + ~sequentia.model_selection.StratifiedShuffleSplit + ~sequentia.model_selection.RepeatedKFold + ~sequentia.model_selection.RepeatedStratifiedKFold + +Example +^^^^^^^ + +Using :class:`.GridSearchCV` with :class:`.StratifiedKFold` to +cross-validate a :class:`.KNNClassifier` training pipeline. :: + + import numpy as np + + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import minmax_scale + + from sequentia.datasets import load_digits + from sequentia.models import KNNClassifier + from sequentia.preprocessing import IndependentFunctionTransformer + from sequentia.model_selection import StratifiedKFold, GridSearchCV + + EPS: np.float32 = np.finfo(np.float32).eps + + # Define model and hyper-parameter search space + search = GridSearchCV( + # Create a basic pipeline with a KNNClassifier to be optimized + estimator=Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("clf", KNNClassifier(use_c=True, n_jobs=-1)) + ] + ), + # Optimize over k, weighting function and window size + param_grid={ + "clf__k": [1, 2, 3, 4, 5], + "clf__weighting": [ + None, lambda x: 1 / (x + EPS), lambda x: np.exp(-x) + ], + "clf__window": [1.0, 0.75, 0.5, 0.25, 0.1], + }, + # Use StratifiedKFold cross-validation + cv=StratifiedKFold(), + n_jobs=-1, + ) + + # Load the spoken digit dataset with a train/test set split + data = load_digits() + train_data, test_data = data.split(test_size=0.2, stratify=True) + + # Perform cross-validation over accuracy and retrieve the best model + search.fit(train_data.X, train_data.y, lengths=train_data.lengths) + clf = search.best_estimator_ + + # Calculate accuracy on the test set split + acc = clf.score(test_data.X, test_data.y, lengths=test_data.lengths) + +.. _definitions: + +Definitions +^^^^^^^^^^^ + +.. autoclass:: sequentia.model_selection.KFold + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split + +.. autoclass:: sequentia.model_selection.StratifiedKFold + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split + +.. autoclass:: sequentia.model_selection.ShuffleSplit + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split + +.. autoclass:: sequentia.model_selection.StratifiedShuffleSplit + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split + +.. autoclass:: sequentia.model_selection.RepeatedKFold + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split + +.. autoclass:: sequentia.model_selection.RepeatedStratifiedKFold + :members: + :inherited-members: + :exclude-members: get_metadata_routing, get_n_splits, split diff --git a/docs/source/sections/models/hmm/classifier.rst b/docs/source/sections/models/hmm/classifier.rst index a94a087..bc3d2ee 100644 --- a/docs/source/sections/models/hmm/classifier.rst +++ b/docs/source/sections/models/hmm/classifier.rst @@ -62,7 +62,10 @@ Methods ~sequentia.models.hmm.classifier.HMMClassifier.save ~sequentia.models.hmm.classifier.HMMClassifier.score -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.models.hmm.classifier.HMMClassifier :members: diff --git a/docs/source/sections/models/hmm/variants/categorical.rst b/docs/source/sections/models/hmm/variants/categorical.rst index e746af8..a028ce1 100644 --- a/docs/source/sections/models/hmm/variants/categorical.rst +++ b/docs/source/sections/models/hmm/variants/categorical.rst @@ -62,7 +62,10 @@ Methods ~sequentia.models.hmm.variants.CategoricalHMM.unfreeze ~sequentia.models.hmm.variants.CategoricalHMM.n_params -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.models.hmm.variants.CategoricalHMM :members: diff --git a/docs/source/sections/models/hmm/variants/gaussian_mixture.rst b/docs/source/sections/models/hmm/variants/gaussian_mixture.rst index bc322e6..36b9be5 100644 --- a/docs/source/sections/models/hmm/variants/gaussian_mixture.rst +++ b/docs/source/sections/models/hmm/variants/gaussian_mixture.rst @@ -73,7 +73,10 @@ Methods ~sequentia.models.hmm.variants.GaussianMixtureHMM.unfreeze ~sequentia.models.hmm.variants.GaussianMixtureHMM.n_params -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.models.hmm.variants.GaussianMixtureHMM :members: diff --git a/docs/source/sections/models/index.rst b/docs/source/sections/models/index.rst index 2b9708e..ba03888 100644 --- a/docs/source/sections/models/index.rst +++ b/docs/source/sections/models/index.rst @@ -16,9 +16,9 @@ The following models provided by Sequentia all support variable length sequences | | | | +----------+------------+ | | | | | Training | Prediction | +=========================+==============================+================+===============+==============+==========+============+ -| :class:`.HMMClassifier` | :class:`.GaussianMixtureHMM` | Classification | Real | ✔ | ✗ | ✔ | +| :class:`.HMMClassifier` | :class:`.GaussianMixtureHMM` | Classification | Real | ✔ | ✔ | ✔ | | +------------------------------+----------------+---------------+--------------+----------+------------+ -| | :class:`.CategoricalHMM` | Classification | Categorical | ✗ | ✗ | ✔ | +| | :class:`.CategoricalHMM` | Classification | Categorical | ✗ | ✔ | ✔ | +-------------------------+------------------------------+----------------+---------------+--------------+----------+------------+ | :class:`.KNNRegressor` | Regression | Real | ✔ | N/A | ✔ | +--------------------------------------------------------+----------------+---------------+--------------+----------+------------+ diff --git a/docs/source/sections/models/knn/classifier.rst b/docs/source/sections/models/knn/classifier.rst index 906fa3b..42fdeff 100644 --- a/docs/source/sections/models/knn/classifier.rst +++ b/docs/source/sections/models/knn/classifier.rst @@ -47,7 +47,10 @@ Methods ~sequentia.models.knn.classifier.KNNClassifier.save ~sequentia.models.knn.classifier.KNNClassifier.score -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.models.knn.classifier.KNNClassifier :members: diff --git a/docs/source/sections/models/knn/regressor.rst b/docs/source/sections/models/knn/regressor.rst index 2e1926f..f5aa9d5 100644 --- a/docs/source/sections/models/knn/regressor.rst +++ b/docs/source/sections/models/knn/regressor.rst @@ -48,7 +48,10 @@ Methods ~sequentia.models.knn.regressor.KNNRegressor.save ~sequentia.models.knn.regressor.KNNRegressor.score -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.models.knn.regressor.KNNRegressor :members: diff --git a/docs/source/sections/preprocessing/transforms/filters.rst b/docs/source/sections/preprocessing/transforms/filters.rst index ccb6a27..75459f7 100644 --- a/docs/source/sections/preprocessing/transforms/filters.rst +++ b/docs/source/sections/preprocessing/transforms/filters.rst @@ -21,7 +21,10 @@ Methods ~sequentia.preprocessing.transforms.mean_filter ~sequentia.preprocessing.transforms.median_filter -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autofunction:: sequentia.preprocessing.transforms.mean_filter .. autofunction:: sequentia.preprocessing.transforms.median_filter diff --git a/docs/source/sections/preprocessing/transforms/function_transformer.rst b/docs/source/sections/preprocessing/transforms/function_transformer.rst index 0fe8954..1b23691 100644 --- a/docs/source/sections/preprocessing/transforms/function_transformer.rst +++ b/docs/source/sections/preprocessing/transforms/function_transformer.rst @@ -29,7 +29,10 @@ Methods ~sequentia.preprocessing.transforms.IndependentFunctionTransformer.inverse_transform ~sequentia.preprocessing.transforms.IndependentFunctionTransformer.transform -| +.. _definitions: + +Definitions +^^^^^^^^^^^ .. autoclass:: sequentia.preprocessing.transforms.IndependentFunctionTransformer :members: diff --git a/make/__init__.py b/make/__init__.py index f25c976..e818b59 100644 --- a/make/__init__.py +++ b/make/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/make/cov.py b/make/cov.py index bd10475..4169231 100644 --- a/make/cov.py +++ b/make/cov.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/make/docs.py b/make/docs.py index 592e69c..9fdbb7a 100644 --- a/make/docs.py +++ b/make/docs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/make/lint.py b/make/lint.py index 0bd9dd7..17151ef 100644 --- a/make/lint.py +++ b/make/lint.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -33,7 +33,7 @@ def check(c: Config) -> None: def format_(c: Config) -> None: """Format Python files.""" commands: list[str] = [ - "poetry run ruff --fix .", + "poetry run ruff check --fix .", "poetry run ruff format .", ] for command in commands: diff --git a/make/release.py b/make/release.py index 9175905..8f2cf19 100644 --- a/make/release.py +++ b/make/release.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/make/tests.py b/make/tests.py index 84111ee..12fb507 100644 --- a/make/tests.py +++ b/make/tests.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -23,6 +23,8 @@ def unit(c: Config, *, cov: bool = False) -> None: command: str = "poetry run pytest tests/" if cov: - command = f"{command} --cov sequentia --cov-report xml" - + command = ( + f"{command} --cov-config .coveragerc " + "--cov sequentia --cov-report xml" + ) c.run(command) diff --git a/mise.toml b/mise.toml new file mode 100644 index 0000000..0fe0947 --- /dev/null +++ b/mise.toml @@ -0,0 +1,6 @@ +[tools] +poetry = { version = 'latest', pyproject = 'pyproject.toml' } +python = '3.13' + +[env] +_.python.venv = ".venv" diff --git a/notice.py b/notice.py index 564a3a9..7151b83 100644 --- a/notice.py +++ b/notice.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -12,7 +12,7 @@ from pathlib import Path notice = """ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/pyproject.toml b/pyproject.toml index e20d3db..55f9311 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "sequentia" -version = "2.0.2" +version = "2.5.0" license = "MIT" authors = ["Edwin Onuonga "] maintainers = ["Edwin Onuonga "] @@ -23,6 +23,7 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", @@ -65,8 +66,11 @@ build-backend = 'poetry.core.masonry.api' [tool.poetry.dependencies] python = "^3.11" -numba = ">=0.56,<1" -numpy = "^1.19.5" +numba = [ + { version = ">=0.56,<1", python = "^3.11,<3.13" }, + { version = ">=0.61.0rc2", python = ">=3.13" } +] +numpy = ">=1.19.5,<3" hmmlearn = ">=0.2.8,<1" dtaidistance = "^2.3.10" scikit-learn = "^1.4" @@ -82,7 +86,7 @@ tox = "4.11.3" pre-commit = ">=3" [tool.poetry.group.lint.dependencies] -ruff = "0.1.3" +ruff = "0.8.4" pydoclint = "0.3.8" [tool.poetry.group.docs.dependencies] @@ -96,8 +100,8 @@ pytest = { version = "^7.4.0" } pytest-cov = { version = "^4.1.0" } [tool.ruff] -required-version = "0.1.3" -select = [ +required-version = "0.8.4" +lint.select = [ "F", # pyflakes: https://pypi.org/project/pyflakes/ "E", # pycodestyle (error): https://pypi.org/project/pycodestyle/ "W", # pycodestyle (warning): https://pypi.org/project/pycodestyle/ @@ -140,7 +144,7 @@ select = [ "PERF", # perflint: https://pypi.org/project/perflint/ "RUF", # ruff ] -ignore = [ +lint.ignore = [ "ANN401", # https://beta.ruff.rs/docs/rules/any-type/ "B905", # https://beta.ruff.rs/docs/rules/zip-without-explicit-strict/ "TD003", # https://beta.ruff.rs/docs/rules/missing-todo-link/ @@ -158,16 +162,15 @@ ignore = [ "C408", # Unnecessary `dict` call (rewrite as a literal) "D401", # First line of docstring should be in imperative mood ] -ignore-init-module-imports = true # allow unused imports in __init__.py line-length = 79 -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.flake8-annotations] +[tool.ruff.lint.flake8-annotations] allow-star-arg-any = true -[tool.ruff.extend-per-file-ignores] +[tool.ruff.lint.extend-per-file-ignores] "__init__.py" = ["PLC0414", "F403", "F401", "F405"] "sequentia/datasets/*.py" = ["B006"] "sequentia/enums.py" = ["E501"] @@ -181,6 +184,21 @@ allow-star-arg-any = true "SLF", "ARG", ] +"sequentia/model_selection/*.py" = [ + "D", + "E", + "ANN", + "PLR", + "TRY", + "EM", + "T", + "BLE", + "RET", + "SLF", + "UP", + "ARG", + "FA" +] "tests/**/*.py" = ["D", "E", "S101"] # "tests/**/test_*.py" = ["ARG001", "S101", "D", "FA100", "FA102", "PLR0915"] "tests/**/test_*.py" = [ diff --git a/sequentia/__init__.py b/sequentia/__init__.py index ee898a0..f15f1aa 100644 --- a/sequentia/__init__.py +++ b/sequentia/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -9,8 +9,22 @@ import sklearn -from sequentia import datasets, enums, models, preprocessing, version +from sequentia import ( + datasets, + enums, + model_selection, + models, + preprocessing, + version, +) -__all__ = ["datasets", "models", "preprocessing", "enums", "version"] +__all__ = [ + "datasets", + "enums", + "model_selection", + "models", + "preprocessing", + "version", +] sklearn.set_config(enable_metadata_routing=True) diff --git a/sequentia/_internal/__init__.py b/sequentia/_internal/__init__.py index cd11e40..b4cba4c 100644 --- a/sequentia/_internal/__init__.py +++ b/sequentia/_internal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/_internal/_data.py b/sequentia/_internal/_data.py index 9d57786..96ccc72 100644 --- a/sequentia/_internal/_data.py +++ b/sequentia/_internal/_data.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/_internal/_hmm/__init__.py b/sequentia/_internal/_hmm/__init__.py index 017d051..9391a8b 100644 --- a/sequentia/_internal/_hmm/__init__.py +++ b/sequentia/_internal/_hmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/_internal/_hmm/topologies.py b/sequentia/_internal/_hmm/topologies.py index 6b605bb..c74278f 100644 --- a/sequentia/_internal/_hmm/topologies.py +++ b/sequentia/_internal/_hmm/topologies.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -13,10 +13,10 @@ from sequentia.enums import TopologyMode __all__ = [ + "TOPOLOGY_MAP", "ErgodicTopology", "LeftRightTopology", "LinearTopology", - "TOPOLOGY_MAP", ] @@ -36,15 +36,15 @@ class BaseTopology: mode: TopologyMode def __init__( - self: BaseTopology, + self, *, n_states: int, random_state: np.random.RandomState, - ) -> BaseTopology: + ) -> None: self.n_states = n_states self.random_state = random_state - def uniform_start_probs(self: BaseTopology) -> FloatArray: + def uniform_start_probs(self) -> FloatArray: """Set the initial state distribution as a discrete uniform distribution. @@ -55,7 +55,7 @@ def uniform_start_probs(self: BaseTopology) -> FloatArray: """ return np.ones(self.n_states) / self.n_states - def random_start_probs(self: BaseTopology) -> FloatArray: + def random_start_probs(self) -> FloatArray: """Set the initial state distribution by randomly sampling probabilities generated by a Dirichlet distribution. @@ -69,7 +69,7 @@ def random_start_probs(self: BaseTopology) -> FloatArray: size=1, ).flatten() - def uniform_transition_probs(self: BaseTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -81,7 +81,7 @@ def uniform_transition_probs(self: BaseTopology) -> FloatArray: """ raise NotImplementedError - def random_transition_probs(self: BaseTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilitiesfrom a Dirichlet distribution - according @@ -94,7 +94,7 @@ def random_transition_probs(self: BaseTopology) -> FloatArray: """ raise NotImplementedError - def check_start_probs(self: BaseTopology, initial: FloatArray, /) -> None: + def check_start_probs(self, initial: FloatArray, /) -> None: """Validate an initial state distribution according to the topology's restrictions. @@ -114,9 +114,7 @@ def check_start_probs(self: BaseTopology, initial: FloatArray, /) -> None: raise ValueError(msg) return initial - def check_transition_probs( - self: BaseTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -152,7 +150,7 @@ class ErgodicTopology(BaseTopology): mode: TopologyMode = TopologyMode.ERGODIC - def uniform_transition_probs(self: ErgodicTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -164,7 +162,7 @@ def uniform_transition_probs(self: ErgodicTopology) -> FloatArray: """ return np.ones((self.n_states, self.n_states)) / self.n_states - def random_transition_probs(self: ErgodicTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution - according @@ -180,9 +178,7 @@ def random_transition_probs(self: ErgodicTopology) -> FloatArray: size=self.n_states, ) - def check_transition_probs( - self: ErgodicTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -216,7 +212,7 @@ class LeftRightTopology(BaseTopology): mode: TopologyMode = TopologyMode.LEFT_RIGHT - def uniform_transition_probs(self: LeftRightTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -233,7 +229,7 @@ def uniform_transition_probs(self: LeftRightTopology) -> FloatArray: lower_ones = np.tril(np.ones(self.n_states), k=-1) return upper_ones / (upper_divisors + lower_ones) - def random_transition_probs(self: LeftRightTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution, according @@ -249,9 +245,7 @@ def random_transition_probs(self: LeftRightTopology) -> FloatArray: row[i:] = self.random_state.dirichlet(np.ones(self.n_states - i)) return transitions - def check_transition_probs( - self: LeftRightTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. @@ -281,7 +275,7 @@ class LinearTopology(LeftRightTopology): mode: TopologyMode = TopologyMode.LINEAR - def uniform_transition_probs(self: LinearTopology) -> FloatArray: + def uniform_transition_probs(self) -> FloatArray: """Set the transition matrix as uniform (equal probability of transitioning to all other possible states from each state) corresponding to the topology. @@ -297,7 +291,7 @@ def uniform_transition_probs(self: LinearTopology) -> FloatArray: row[i : (i + size)] = np.ones(size) / size return transitions - def random_transition_probs(self: LinearTopology) -> FloatArray: + def random_transition_probs(self) -> FloatArray: """Set the transition matrix as random (random probability of transitioning to all other possible states from each state) by sampling probabilities from a Dirichlet distribution, according to the @@ -314,9 +308,7 @@ def random_transition_probs(self: LinearTopology) -> FloatArray: row[i : (i + size)] = self.random_state.dirichlet(np.ones(size)) return transitions - def check_transition_probs( - self: LinearTopology, transitions: FloatArray, / - ) -> FloatArray: + def check_transition_probs(self, transitions: FloatArray, /) -> FloatArray: """Validate a transition matrix according to the topology's restrictions. diff --git a/sequentia/_internal/_multiprocessing.py b/sequentia/_internal/_multiprocessing.py index b0260d0..2134c64 100644 --- a/sequentia/_internal/_multiprocessing.py +++ b/sequentia/_internal/_multiprocessing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/_internal/_sklearn.py b/sequentia/_internal/_sklearn.py new file mode 100644 index 0000000..d364f57 --- /dev/null +++ b/sequentia/_internal/_sklearn.py @@ -0,0 +1,12 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +import sklearn + +__all__ = ["routing_enabled"] + + +def routing_enabled() -> bool: + return sklearn.get_config()["enable_metadata_routing"] diff --git a/sequentia/_internal/_typing.py b/sequentia/_internal/_typing.py index d9db94b..2e37a98 100644 --- a/sequentia/_internal/_typing.py +++ b/sequentia/_internal/_typing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -6,8 +6,8 @@ import numpy as np import numpy.typing as npt -__all__ = ["FloatArray", "IntArray", "Array"] +__all__ = ["Array", "FloatArray", "IntArray"] -FloatArray = npt.NDArray[np.float_] -IntArray = npt.NDArray[np.int_] +FloatArray = npt.NDArray[np.float64] +IntArray = npt.NDArray[np.int64] Array = FloatArray | IntArray diff --git a/sequentia/_internal/_validation.py b/sequentia/_internal/_validation.py index 04c8122..89f26cb 100644 --- a/sequentia/_internal/_validation.py +++ b/sequentia/_internal/_validation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -20,15 +20,15 @@ from sequentia._internal._typing import Array, FloatArray, IntArray __all__ = [ - "check_random_state", - "check_is_fitted", - "requires_fit", - "check_classes", "check_X", "check_X_lengths", - "check_y", - "check_weighting", + "check_classes", + "check_is_fitted", + "check_random_state", "check_use_c", + "check_weighting", + "check_y", + "requires_fit", ] @@ -60,7 +60,7 @@ def check_is_fitted( def requires_fit(function: t.Callable) -> t.Callable: @functools.wraps(function) - def wrapper(self: t.Self, *args: t.Any, **kwargs: t.Any) -> t.Any: + def wrapper(self, *args: t.Any, **kwargs: t.Any) -> t.Any: # noqa: ANN001 check_is_fitted(self) return function(self, *args, **kwargs) @@ -100,20 +100,20 @@ def check_X( X: t.Iterable[int] | t.Iterable[float], /, *, - dtype: np.float_ | np.int_, + dtype: np.float64 | np.int64, univariate: bool = False, ) -> Array: if not isinstance(X, np.ndarray): try: X = np.array(X).astype(dtype) - except Exception as e: # noqa: BLE001 + except Exception as e: type_ = type(X).__name__ msg = f"Expected value to be a numpy.ndarray, got {type_!r}" raise TypeError(msg) from e if (dtype_ := X.dtype) != dtype: try: X = X.astype(dtype) - except Exception as e: # noqa: BLE001 + except Exception as e: msg = f"Expected array to have dtype {dtype}, got {dtype_}" raise TypeError(msg) from e if (ndim_ := X.ndim) != 2: @@ -133,7 +133,7 @@ def check_X_lengths( /, *, lengths: t.Iterable[int] | None, - dtype: np.float_ | np.int_, + dtype: np.float64 | np.int64, univariate: bool = False, ) -> tuple[Array, IntArray]: # validate observations @@ -172,7 +172,7 @@ def check_y( /, *, lengths: IntArray, - dtype: np.float_ | np.int_ | None = None, + dtype: np.float64 | np.int64 | None = None, ) -> Array: if y is None: msg = "No output values `y` provided" @@ -214,7 +214,7 @@ def check_weighting( if x.shape != weights.shape: msg = "Weights should have the same shape as inputs" raise ValueError(msg) # noqa: TRY301 - except Exception as e: # noqa: BLE001 + except Exception as e: msg = "Invalid weighting function" raise ValueError(msg) from e diff --git a/sequentia/datasets/__init__.py b/sequentia/datasets/__init__.py index 66efacc..817b0a2 100644 --- a/sequentia/datasets/__init__.py +++ b/sequentia/datasets/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -10,4 +10,4 @@ from sequentia.datasets.digits import load_digits from sequentia.datasets.gene_families import load_gene_families -__all__ = ["data", "load_digits", "load_gene_families", "SequentialDataset"] +__all__ = ["SequentialDataset", "data", "load_digits", "load_gene_families"] diff --git a/sequentia/datasets/base.py b/sequentia/datasets/base.py index 18984f2..4bc096b 100644 --- a/sequentia/datasets/base.py +++ b/sequentia/datasets/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -26,19 +26,17 @@ class SequentialDataset: """Utility wrapper for a generic sequential dataset.""" def __init__( - self: SequentialDataset, + self, X: Array, y: Array | None = None, *, lengths: IntArray | None = None, classes: list[int] | None = None, - ) -> SequentialDataset: + ) -> None: """Initialize a :class:`.SequentialDataset`. Parameters ---------- - self: SequentialDataset - X: Sequence(s). @@ -80,17 +78,21 @@ def __init__( self._idxs = _data.get_idxs(self.lengths) def split( - self: SequentialDataset, + self, *, test_size: ( - pyd.NonNegativeInt | pyd.confloat(ge=0, le=1) | None - ) = None, # placeholder + pyd.NonNegativeInt + | t.Annotated[float, pyd.Field(ge=0, le=1)] + | None + ) = None, train_size: ( - pyd.NonNegativeInt | pyd.confloat(ge=0, le=1) | None - ) = None, # placeholder + pyd.NonNegativeInt + | t.Annotated[float, pyd.Field(ge=0, le=1)] + | None + ) = None, random_state: ( pyd.NonNegativeInt | np.random.RandomState | None - ) = None, # placeholder + ) = None, shuffle: bool = True, stratify: bool = False, ) -> tuple[SequentialDataset, SequentialDataset]: @@ -100,8 +102,6 @@ def split( Parameters ---------- - self: SequentialDataset - test_size: Size of the test partition. @@ -171,9 +171,7 @@ def split( return data_train, data_test - def iter_by_class( - self: SequentialDataset, - ) -> t.Generator[tuple[Array, Array, int]]: + def iter_by_class(self) -> t.Generator[tuple[Array, Array, int]]: """Subset the observation sequences by class. Returns @@ -207,24 +205,18 @@ def iter_by_class( lengths = self._lengths[ind] yield np.vstack(X), lengths, c - def __len__(self: SequentialDataset) -> int: + def __len__(self) -> int: """Return the number of sequences in the dataset.""" return len(self._lengths) - def __getitem__( - self: SequentialDataset, - /, - i: int, - ) -> Array | tuple[Array, Array]: + def __getitem__(self, /, i: int) -> Array | tuple[Array, Array]: """Slice observation sequences and corresponding outputs.""" idxs = np.atleast_2d(self._idxs[i]) X = list(_data.iter_X(self._X, idxs=idxs)) X = X[0] if isinstance(i, int) and len(X) == 1 else X return X if self._y is None else (X, self._y[i]) - def __iter__( - self: SequentialDataset, - ) -> t.Generator[Array | tuple[Array, Array]]: + def __iter__(self) -> t.Generator[Array | tuple[Array, Array]]: """Create a generator over sequences and their corresponding outputs. """ @@ -232,7 +224,7 @@ def __iter__( yield self[i] @property - def X(self: SequentialDataset) -> Array: + def X(self) -> Array: """Observation sequences. Returns @@ -243,7 +235,7 @@ def X(self: SequentialDataset) -> Array: return self._X @property - def y(self: SequentialDataset) -> Array: + def y(self) -> Array: """Outputs corresponding to ``X``. Returns @@ -262,7 +254,7 @@ def y(self: SequentialDataset) -> Array: return self._y @property - def lengths(self: SequentialDataset) -> IntArray: + def lengths(self) -> IntArray: """Lengths corresponding to ``X``. Returns @@ -273,7 +265,7 @@ def lengths(self: SequentialDataset) -> IntArray: return self._lengths @property - def classes(self: SequentialDataset) -> IntArray | None: + def classes(self) -> IntArray | None: """Set of unique classes in ``y``. Returns @@ -284,7 +276,7 @@ def classes(self: SequentialDataset) -> IntArray | None: return self._classes @property - def idxs(self: SequentialDataset) -> IntArray: + def idxs(self) -> IntArray: """Observation sequence start and end indices. Returns @@ -295,7 +287,7 @@ def idxs(self: SequentialDataset) -> IntArray: return self._idxs @property - def X_y(self: SequentialDataset) -> dict[str, Array]: + def X_y(self) -> dict[str, Array]: """Observation sequences and corresponding outputs. Returns @@ -317,7 +309,7 @@ def X_y(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "y": self._y} @property - def X_lengths(self: SequentialDataset) -> dict[str, Array]: + def X_lengths(self) -> dict[str, Array]: """Observation sequences and corresponding lengths. Returns @@ -331,7 +323,7 @@ def X_lengths(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "lengths": self._lengths} @property - def X_y_lengths(self: SequentialDataset) -> dict[str, Array]: + def X_y_lengths(self) -> dict[str, Array]: """Observation sequences and corresponding outputs and lengths. Returns @@ -354,7 +346,7 @@ def X_y_lengths(self: SequentialDataset) -> dict[str, Array]: return {"X": self._X, "y": self._y, "lengths": self._lengths} def save( - self: SequentialDataset, + self, path: str | pathlib.Path | t.IO, /, *, @@ -389,9 +381,7 @@ def save( save_fun(path, **arrs) @classmethod - def load( - cls: type[SequentialDataset], path: str | pathlib.Path | t.IO, / - ) -> SequentialDataset: + def load(cls, path: str | pathlib.Path | t.IO, /) -> SequentialDataset: """Load a stored dataset in ``.npz`` format. See :func:`numpy:numpy.load`. @@ -413,7 +403,7 @@ def load( """ return cls(**np.load(path)) - def copy(self: SequentialDataset) -> SequentialDataset: + def copy(self) -> SequentialDataset: """Create a copy of the dataset. Returns diff --git a/sequentia/datasets/data/__init__.py b/sequentia/datasets/data/__init__.py index e206863..3f2a533 100644 --- a/sequentia/datasets/data/__init__.py +++ b/sequentia/datasets/data/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/datasets/digits.py b/sequentia/datasets/digits.py index defe491..7fbca04 100644 --- a/sequentia/datasets/digits.py +++ b/sequentia/datasets/digits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -9,6 +9,7 @@ import importlib.resources import operator +import typing as t import numpy as np import pydantic as pyd @@ -22,7 +23,19 @@ @pyd.validate_call def load_digits( - *, digits: set[pyd.conint(ge=0, le=9)] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + *, + digits: set[t.Annotated[int, pyd.Field(ge=0, le=9)]] = { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + }, ) -> SequentialDataset: """Load a dataset of MFCC features of spoken digit audio samples from the Free Spoken Digit Dataset. diff --git a/sequentia/datasets/gene_families.py b/sequentia/datasets/gene_families.py index 2151568..54559ff 100644 --- a/sequentia/datasets/gene_families.py +++ b/sequentia/datasets/gene_families.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -9,6 +9,7 @@ import importlib.resources import operator +import typing as t import numpy as np import pydantic as pyd @@ -23,7 +24,16 @@ @pyd.validate_call def load_gene_families( - *, families: set[pyd.conint(ge=0, le=6)] = {0, 1, 2, 3, 4, 5, 6} + *, + families: set[t.Annotated[int, pyd.Field(ge=0, le=6)]] = { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + }, ) -> tuple[SequentialDataset, LabelEncoder]: """Load a dataset of human DNA sequences grouped by gene family. diff --git a/sequentia/enums.py b/sequentia/enums.py index 4d3128f..3b70919 100644 --- a/sequentia/enums.py +++ b/sequentia/enums.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -7,7 +7,7 @@ import enum -__all__ = ["TopologyMode", "CovarianceMode", "TransitionMode", "PriorMode"] +__all__ = ["CovarianceMode", "PriorMode", "TopologyMode", "TransitionMode"] class TopologyMode(enum.StrEnum): diff --git a/sequentia/model_selection/__init__.py b/sequentia/model_selection/__init__.py new file mode 100644 index 0000000..07e3c40 --- /dev/null +++ b/sequentia/model_selection/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""Hyper-parameter search and dataset splitting utilities.""" + +from sequentia.model_selection._search import ( + GridSearchCV, + RandomizedSearchCV, + param_grid, +) +from sequentia.model_selection._search_successive_halving import ( + HalvingGridSearchCV, + HalvingRandomSearchCV, +) +from sequentia.model_selection._split import ( + KFold, + RepeatedKFold, + RepeatedStratifiedKFold, + ShuffleSplit, + StratifiedKFold, + StratifiedShuffleSplit, +) + +__all__ = [ + "GridSearchCV", + "HalvingGridSearchCV", + "HalvingRandomSearchCV", + "KFold", + "RandomizedSearchCV", + "RepeatedKFold", + "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedKFold", + "StratifiedShuffleSplit", + "param_grid", +] diff --git a/sequentia/model_selection/_search.py b/sequentia/model_selection/_search.py new file mode 100644 index 0000000..d0b5ef5 --- /dev/null +++ b/sequentia/model_selection/_search.py @@ -0,0 +1,352 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""This file is an adapted version of the same file from the +sklearn.model_selection sub-package. + +Below is the original license from Scikit-Learn, copied on 27th December 2024 +from https://github.com/scikit-learn/scikit-learn/blob/main/COPYING. + +--- + +BSD 3-Clause License + +Copyright (c) 2007-2024 The scikit-learn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +# Author: Alexandre Gramfort , +# Gael Varoquaux +# Andreas Mueller +# Olivier Grisel +# Raghav RV +# License: BSD 3 clause + +import time +import typing as t +from collections import defaultdict +from itertools import product + +from sklearn.base import _fit_context, clone, is_classifier +from sklearn.metrics._scorer import _MultimetricScorer +from sklearn.model_selection import _search +from sklearn.model_selection._split import check_cv +from sklearn.model_selection._validation import ( + _insert_error_scores, + _warn_or_raise_about_fit_failures, +) +from sklearn.utils.parallel import Parallel, delayed +from sklearn.utils.validation import _check_method_params + +from sequentia.model_selection._validation import _fit_and_score + +__all__ = ["BaseSearchCV", "GridSearchCV", "RandomizedSearchCV", "param_grid"] + + +def param_grid(**kwargs: list[t.Any]) -> list[dict[str, t.Any]]: + """Generates a hyper-parameter grid for a nested object. + + Examples + -------- + Using :func:`.param_grid` in a grid search to cross-validate over + settings for :class:`.GaussianMixtureHMM`, which is a nested model + specified in the constructor of a :class:`.HMMClassifier`. :: + + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import minmax_scale + + from sequentia.enums import PriorMode, CovarianceMode, TopologyMode + from sequentia.models import HMMClassifier, GaussianMixtureHMM + from sequentia.preprocessing import IndependentFunctionTransformer + from sequentia.model_selection import GridSearchCV, StratifiedKFold + + GridSearchCV( + estimator=Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("clf", HMMClassifier(variant=GaussianMixtureHMM)), + ] + ), + param_grid={ + "clf__prior": [PriorMode.UNIFORM, PriorMode.FREQUENCY], + "clf__model_kwargs": param_grid( + n_states=[3, 5, 7], + n_components=[2, 3, 4], + covariance=[ + CovarianceMode.DIAGONAL, CovarianceMode.SPHERICAL + ], + topology=[ + TopologyMode.LEFT_RIGHT, TopologyMode.LINEAR + ], + ) + }, + cv=StratifiedKFold(), + ) + + Parameters + ---------- + **kwargs: + Hyper-parameter name and corresponding values. + + Returns + ------- + Hyper-parameter grid for a nested object. + """ + return [ + dict(zip(kwargs.keys(), values)) + for values in product(*kwargs.values()) + ] + + +class BaseSearchCV(_search.BaseSearchCV): + @_fit_context( + # *SearchCV.estimator is not validated yet + prefer_skip_nested_validation=False + ) + def fit(self, X, y=None, **params): + """Run fit with all sets of parameters. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) or (n_samples, n_samples) + Training vectors, where `n_samples` is the number of samples and + `n_features` is the number of features. For precomputed kernel or + distance matrix, the expected shape of X is (n_samples, n_samples). + + y : array-like of shape (n_samples, n_output) \ + or (n_samples,), default=None + Target relative to X for classification or regression; + None for unsupervised learning. + + **params : dict of str -> object + Parameters passed to the ``fit`` method of the estimator, the scorer, + and the CV splitter. + + If a fit parameter is an array-like whose length is equal to + `num_samples` then it will be split across CV groups along with `X` + and `y`. For example, the :term:`sample_weight` parameter is split + because `len(sample_weights) = len(X)`. + + Returns + ------- + self : object + Instance of fitted estimator. + """ + estimator = self.estimator + scorers, refit_metric = self._get_scorers() + + # X, y = indexable(X, y) # NOTE @eonu: removed + params = _check_method_params(X, params=params) + + routed_params = self._get_routed_params_for_fit(params) + + cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator)) + n_splits = cv_orig.get_n_splits(X, y, **routed_params.splitter.split) + + base_estimator = clone(self.estimator) + + parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch) + + fit_and_score_kwargs = dict( + scorer=scorers, + fit_params=routed_params.estimator.fit, + score_params=routed_params.scorer.score, + return_train_score=self.return_train_score, + return_n_test_samples=True, + return_times=True, + return_parameters=False, + error_score=self.error_score, + verbose=self.verbose, + ) + results = {} + with parallel: + all_candidate_params = [] + all_out = [] + all_more_results = defaultdict(list) + + def evaluate_candidates( + candidate_params, cv=None, more_results=None + ): + cv = cv or cv_orig + candidate_params = list(candidate_params) + n_candidates = len(candidate_params) + + if self.verbose > 0: + print( + "Fitting {0} folds for each of {1} candidates," + " totalling {2} fits".format( + n_splits, n_candidates, n_candidates * n_splits + ) + ) + + out = parallel( + delayed(_fit_and_score)( + clone(base_estimator), + X, + y, + train=train, + test=test, + parameters=parameters, + split_progress=(split_idx, n_splits), + candidate_progress=(cand_idx, n_candidates), + **fit_and_score_kwargs, + ) + for (cand_idx, parameters), ( + split_idx, + (train, test), + ) in product( + enumerate(candidate_params), + enumerate( + cv.split(X, y, **routed_params.splitter.split) + ), + ) + ) + + if len(out) < 1: + raise ValueError( + "No fits were performed. " + "Was the CV iterator empty? " + "Were there no candidates?" + ) + elif len(out) != n_candidates * n_splits: + raise ValueError( + "cv.split and cv.get_n_splits returned " + f"inconsistent results. Expected {n_splits} " + f"splits, got {len(out) // n_candidates}" + ) + + _warn_or_raise_about_fit_failures(out, self.error_score) + + # For callable self.scoring, the return type is only know after + # calling. If the return type is a dictionary, the error scores + # can now be inserted with the correct key. The type checking + # of out will be done in `_insert_error_scores`. + if callable(self.scoring): + _insert_error_scores(out, self.error_score) + + all_candidate_params.extend(candidate_params) + all_out.extend(out) + + if more_results is not None: + for key, value in more_results.items(): + all_more_results[key].extend(value) + + nonlocal results + results = self._format_results( + all_candidate_params, n_splits, all_out, all_more_results + ) + + return results + + self._run_search(evaluate_candidates) + + # multimetric is determined here because in the case of a callable + # self.scoring the return type is only known after calling + first_test_score = all_out[0]["test_scores"] + self.multimetric_ = isinstance(first_test_score, dict) + + # check refit_metric now for a callabe scorer that is multimetric + if callable(self.scoring) and self.multimetric_: + self._check_refit_for_multimetric(first_test_score) + refit_metric = self.refit + + # For multi-metric evaluation, store the best_index_, best_params_ and + # best_score_ iff refit is one of the scorer names + # In single metric evaluation, refit_metric is "score" + if self.refit or not self.multimetric_: + self.best_index_ = self._select_best_index( + self.refit, refit_metric, results + ) + if not callable(self.refit): + # With a non-custom callable, we can select the best score + # based on the best index + self.best_score_ = results[f"mean_test_{refit_metric}"][ + self.best_index_ + ] + self.best_params_ = results["params"][self.best_index_] + + if self.refit: + # here we clone the estimator as well as the parameters, since + # sometimes the parameters themselves might be estimators, e.g. + # when we search over different estimators in a pipeline. + # ref: https://github.com/scikit-learn/scikit-learn/pull/26786 + self.best_estimator_ = clone(base_estimator).set_params( + **clone(self.best_params_, safe=False) + ) + + refit_start_time = time.time() + if y is not None: + self.best_estimator_.fit(X, y, **routed_params.estimator.fit) + else: + self.best_estimator_.fit(X, **routed_params.estimator.fit) + refit_end_time = time.time() + self.refit_time_ = refit_end_time - refit_start_time + + if hasattr(self.best_estimator_, "feature_names_in_"): + self.feature_names_in_ = self.best_estimator_.feature_names_in_ + + # Store the only scorer not as a dict for single metric evaluation + if isinstance(scorers, _MultimetricScorer): + self.scorer_ = scorers._scorers + else: + self.scorer_ = scorers + + self.cv_results_ = results + self.n_splits_ = n_splits + + return self + + +class GridSearchCV(_search.GridSearchCV, BaseSearchCV): + """Exhaustive search over specified parameter values for an estimator. + + ``cv`` must be a valid splitting method from + :mod:`sequentia.model_selection`. + + See Also + -------- + :class:`sklearn.model_selection.GridSearchCV` + :class:`.GridSearchCV` is a modified version + of this class that supports sequences. + """ + + +class RandomizedSearchCV(_search.RandomizedSearchCV, BaseSearchCV): + """Randomized search on hyper parameters. + + ``cv`` must be a valid splitting method from + :mod:`sequentia.model_selection`. + + See Also + -------- + :class:`sklearn.model_selection.RandomizedSearchCV` + :class:`.RandomizedSearchCV` is a modified version + of this class that supports sequences. + """ diff --git a/sequentia/model_selection/_search_successive_halving.py b/sequentia/model_selection/_search_successive_halving.py new file mode 100644 index 0000000..499e5b1 --- /dev/null +++ b/sequentia/model_selection/_search_successive_halving.py @@ -0,0 +1,77 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""This file is an adapted version of the same file from the +sklearn.model_selection sub-package. + +Below is the original license from Scikit-Learn, copied on 27th December 2024 +from https://github.com/scikit-learn/scikit-learn/blob/main/COPYING. + +--- + +BSD 3-Clause License + +Copyright (c) 2007-2024 The scikit-learn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from sklearn.model_selection import _search_successive_halving as _search + +from sequentia.model_selection._search import BaseSearchCV + +__all__ = ["HalvingGridSearchCV", "HalvingRandomSearchCV"] + + +class HalvingGridSearchCV(_search.HalvingGridSearchCV, BaseSearchCV): + """Search over specified parameter values with successive halving. + + ``cv`` must be a valid splitting method from + :mod:`sequentia.model_selection`. + + See Also + -------- + :class:`sklearn.model_selection.HalvingGridSearchCV` + :class:`.HalvingGridSearchCV` is a modified version + of this class that supports sequences. + """ + + +class HalvingRandomSearchCV(_search.HalvingRandomSearchCV, BaseSearchCV): + """Randomized search on hyper parameters with successive halving. + + ``cv`` must be a valid splitting method from + :mod:`sequentia.model_selection`. + + See Also + -------- + :class:`sklearn.model_selection.HalvingRandomSearchCV` + :class:`.HalvingRandomSearchCV` is a modified version + of this class that supports sequences. + """ diff --git a/sequentia/model_selection/_split.py b/sequentia/model_selection/_split.py new file mode 100644 index 0000000..d710f2f --- /dev/null +++ b/sequentia/model_selection/_split.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""This file is an adapted version of the same file from the +sklearn.model_selection sub-package. + +Below is the original license from Scikit-Learn, copied on 27th December 2024 +from https://github.com/scikit-learn/scikit-learn/blob/main/COPYING. + +--- + +BSD 3-Clause License + +Copyright (c) 2007-2024 The scikit-learn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import typing as t + +import numpy as np +from sklearn.model_selection import _split + +__all__ = [ + "KFold", + "RepeatedKFold", + "RepeatedStratifiedKFold", + "ShuffleSplit", + "StratifiedKFold", + "StratifiedShuffleSplit", +] + + +class KFold(_split.KFold): + """K-Fold cross-validator. + + Provides train/test indices to split data in train/test sets. + Split dataset into k consecutive folds (without shuffling by default). + + Each fold is then used once as a validation while the + k - 1 remaining folds form the training set. + + See Also + -------- + :class:`sklearn.model_selection.KFold` + :class:`.KFold` is a modified version + of this class that supports sequences. + """ + + def split( + self, X: np.ndarray, y: np.ndarray, groups: t.Any = None + ) -> None: + return super().split(y, y, groups) + + +class StratifiedKFold(_split.StratifiedKFold): + """Stratified K-Fold cross-validator. + + Provides train/test indices to split data in train/test sets. + + This cross-validation object is a variation of + KFold that returns stratified folds. + + The folds are made by preserving the percentage of samples for each class. + + See Also + -------- + :class:`sklearn.model_selection.StratifiedKFold` + :class:`.StratifiedKFold` is a modified version + of this class that supports sequences. + """ + + def split( + self, X: np.ndarray, y: np.ndarray, groups: t.Any = None + ) -> None: + return super().split(y, y, groups) + + +class ShuffleSplit(_split.ShuffleSplit): + """Random permutation cross-validator. + + Yields indices to split data into training and test sets. + + Note: contrary to other cross-validation strategies, random splits do not + guarantee that test sets across all folds will be mutually exclusive, + and might include overlapping samples. However, this is still very likely + for sizeable datasets. + + See Also + -------- + :class:`sklearn.model_selection.ShuffleSplit` + :class:`.ShuffleSplit` is a modified version + of this class that supports sequences. + """ + + def split( + self, + X: np.ndarray, + y: np.ndarray | None = None, + groups: t.Any = None, + ) -> None: + return super().split(y, y, groups) + + +class StratifiedShuffleSplit(_split.StratifiedShuffleSplit): + """Stratified :class:`.ShuffleSplit` cross-validator. + + Provides train/test indices to split data in train/test sets. + + This cross-validation object is a merge of :class:`.StratifiedKFold` + and :class:`.ShuffleSplit`, which returns stratified randomized folds. + The folds are made by preserving the percentage of samples for each class. + + See Also + -------- + :class:`sklearn.model_selection.StratifiedShuffleSplit` + :class:`.StratifiedShuffleSplit` is a modified version + of this class that supports sequences. + """ + + def split( + self, + X: np.ndarray, + y: np.ndarray | None = None, + groups: t.Any = None, + ) -> None: + return super().split(y, y, groups) + + +class RepeatedKFold(_split.RepeatedKFold): + """Repeated :class:`.KFold` cross validator. + + Repeats :class:`.KFold` n times with different randomization in each repetition. + + See Also + -------- + :class:`sklearn.model_selection.RepeatedKFold` + :class:`.RepeatedKFold` is a modified version + of this class that supports sequences. + """ + + def split( + self, + X: np.ndarray, + y: np.ndarray | None = None, + groups: t.Any = None, + ) -> None: + return super().split(y, y, groups) + + +class RepeatedStratifiedKFold(_split.RepeatedStratifiedKFold): + """Repeated :class:`.StratifiedKFold` cross validator. + + Repeats :class:`.StratifiedKFold` n times with different randomization + in each repetition. + + See Also + -------- + :class:`sklearn.model_selection.RepeatedStratifiedKFold` + :class:`.RepeatedStratifiedKFold` is a modified version + of this class that supports sequences. + """ + + def split( + self, + X: np.ndarray, + y: np.ndarray | None = None, + groups: t.Any = None, + ) -> None: + return super().split(y, y, groups) diff --git a/sequentia/model_selection/_validation.py b/sequentia/model_selection/_validation.py new file mode 100644 index 0000000..6cd0674 --- /dev/null +++ b/sequentia/model_selection/_validation.py @@ -0,0 +1,234 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +"""This file is an adapted version of the same file from the +sklearn.model_selection sub-package. + +Below is the original license from Scikit-Learn, copied on 27th December 2024 +from https://github.com/scikit-learn/scikit-learn/blob/main/COPYING. + +--- + +BSD 3-Clause License + +Copyright (c) 2007-2024 The scikit-learn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + +import numbers +import time +from traceback import format_exc + +import numpy as np +from joblib import logger +from sklearn.base import clone +from sklearn.metrics._scorer import _MultimetricScorer +from sklearn.model_selection._validation import _score +from sklearn.utils._array_api import device, get_namespace +from sklearn.utils.validation import _check_method_params, _num_samples + +from sequentia._internal import _data + +__all__ = ["_fit_and_score"] + + +def _fit_and_score( + estimator, + X, + y, + *, + scorer, + train, + test, + verbose, + parameters, + fit_params, + score_params, + return_train_score=False, + return_parameters=False, + return_n_test_samples=False, + return_times=False, + return_estimator=False, + split_progress=None, + candidate_progress=None, + error_score=np.nan, +): + xp, _ = get_namespace(X) + X_device = device(X) + + # Make sure that we can fancy index X even if train and test are provided + # as NumPy arrays by NumPy only cross-validation splitters. + train, test = ( + xp.asarray(train, device=X_device), + xp.asarray(test, device=X_device), + ) + + if not isinstance(error_score, numbers.Number) and error_score != "raise": + raise ValueError( + "error_score must be the string 'raise' or a numeric value. " + "(Hint: if using 'raise', please make sure that it has been " + "spelled correctly.)" + ) + + progress_msg = "" + if verbose > 2: + if split_progress is not None: + progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" + if candidate_progress and verbose > 9: + progress_msg += ( + f"; {candidate_progress[0]+1}/{candidate_progress[1]}" + ) + + if verbose > 1: + if parameters is None: + params_msg = "" + else: + sorted_keys = sorted(parameters) # Ensure deterministic o/p + params_msg = ", ".join(f"{k}={parameters[k]}" for k in sorted_keys) + if verbose > 9: + start_msg = f"[CV{progress_msg}] START {params_msg}" + print(f"{start_msg}{(80 - len(start_msg)) * '.'}") + + # Adjust length of sample weights + lengths = fit_params["lengths"] # NOTE @eonu: added this + fit_params = fit_params if fit_params is not None else {} + fit_params = _check_method_params(X, params=fit_params, indices=train) + score_params = score_params if score_params is not None else {} + score_params_train = _check_method_params( + X, params=score_params, indices=train + ) + score_params_test = _check_method_params( + X, params=score_params, indices=test + ) + + if parameters is not None: + # here we clone the parameters, since sometimes the parameters + # themselves might be estimators, e.g. when we search over different + # estimators in a pipeline. + # ref: https://github.com/scikit-learn/scikit-learn/pull/26786 + estimator = estimator.set_params(**clone(parameters, safe=False)) + + start_time = time.time() + + # NOTE @eonu: modified this block + idxs = _data.get_idxs(lengths) + idxs_train, idxs_test = idxs[train], idxs[test] + y_train, y_test = y[train], y[test] + lengths_train, lengths_test = lengths[train], lengths[test] + X_train = np.concatenate(list(_data.iter_X(X, idxs=idxs_train))) + X_test = np.concatenate(list(_data.iter_X(X, idxs=idxs_test))) + fit_params["lengths"] = lengths_train + score_params_train["lengths"] = lengths_train + score_params_test["lengths"] = lengths_test + + result = {} + try: + if y_train is None: + estimator.fit(X_train, **fit_params) + else: + estimator.fit(X_train, y_train, **fit_params) + + except Exception: + # Note fit time as time until error + fit_time = time.time() - start_time + score_time = 0.0 + if error_score == "raise": + raise + elif isinstance(error_score, numbers.Number): + if isinstance(scorer, _MultimetricScorer): + test_scores = {name: error_score for name in scorer._scorers} + if return_train_score: + train_scores = test_scores.copy() + else: + test_scores = error_score + if return_train_score: + train_scores = error_score + result["fit_error"] = format_exc() + else: + result["fit_error"] = None + + fit_time = time.time() - start_time + test_scores = _score( + estimator, X_test, y_test, scorer, score_params_test, error_score + ) + score_time = time.time() - start_time - fit_time + if return_train_score: + train_scores = _score( + estimator, + X_train, + y_train, + scorer, + score_params_train, + error_score, + ) + + if verbose > 1: + total_time = score_time + fit_time + end_msg = f"[CV{progress_msg}] END " + result_msg = params_msg + (";" if params_msg else "") + if verbose > 2: + if isinstance(test_scores, dict): + for scorer_name in sorted(test_scores): + result_msg += f" {scorer_name}: (" + if return_train_score: + scorer_scores = train_scores[scorer_name] + result_msg += f"train={scorer_scores:.3f}, " + result_msg += f"test={test_scores[scorer_name]:.3f})" + else: + result_msg += ", score=" + if return_train_score: + result_msg += ( + f"(train={train_scores:.3f}, test={test_scores:.3f})" + ) + else: + result_msg += f"{test_scores:.3f}" + result_msg += f" total time={logger.short_format_time(total_time)}" + + # Right align the result_msg + end_msg += "." * (80 - len(end_msg) - len(result_msg)) + end_msg += result_msg + print(end_msg) + + result["test_scores"] = test_scores + if return_train_score: + result["train_scores"] = train_scores + if return_n_test_samples: + result["n_test_samples"] = _num_samples(X_test) + if return_times: + result["fit_time"] = fit_time + result["score_time"] = score_time + if return_parameters: + result["parameters"] = parameters + if return_estimator: + result["estimator"] = estimator + return result diff --git a/sequentia/models/__init__.py b/sequentia/models/__init__.py index bbff6eb..e9b3bbc 100644 --- a/sequentia/models/__init__.py +++ b/sequentia/models/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/models/base.py b/sequentia/models/base.py index 7a412d5..55874be 100644 --- a/sequentia/models/base.py +++ b/sequentia/models/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -8,6 +8,7 @@ from __future__ import annotations import abc +import typing as t import numpy as np import sklearn.base @@ -28,18 +29,18 @@ class ClassifierMixin( @abc.abstractmethod def fit( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, lengths: IntArray | None = None, - ) -> ClassifierMixin: + ) -> t.Self: """Fit the classifier with the provided sequences and outputs.""" raise NotImplementedError @abc.abstractmethod def predict( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -48,7 +49,7 @@ def predict( raise NotImplementedError def fit_predict( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, @@ -59,8 +60,6 @@ def fit_predict( Parameters ---------- - self: ClassifierMixin - X: Sequence(s). @@ -82,7 +81,7 @@ def fit_predict( @abc.abstractmethod def predict_proba( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -92,7 +91,7 @@ def predict_proba( @abc.abstractmethod def predict_scores( - self: ClassifierMixin, + self, X: Array, *, lengths: IntArray | None = None, @@ -102,7 +101,7 @@ def predict_scores( @_validation.requires_fit def score( - self: ClassifierMixin, + self, X: Array, y: IntArray, *, @@ -114,8 +113,6 @@ def score( Parameters ---------- - self: ClassifierMixin - X: Sequence(s). @@ -155,24 +152,24 @@ class RegressorMixin(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin): @abc.abstractmethod def fit( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, lengths: IntArray | None = None, - ) -> RegressorMixin: + ) -> t.Self: """Fit the regressor with the provided sequences and outputs.""" raise NotImplementedError @abc.abstractmethod def predict( - self: RegressorMixin, X: FloatArray, lengths: IntArray | None = None + self, X: FloatArray, lengths: IntArray | None = None ) -> FloatArray: """Predict outputs for the provided sequences.""" raise NotImplementedError def fit_predict( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, @@ -183,8 +180,6 @@ def fit_predict( Parameters ---------- - self: RegressorMixin - X: Sequence(s). @@ -206,7 +201,7 @@ def fit_predict( @_validation.requires_fit def score( - self: RegressorMixin, + self, X: FloatArray, y: FloatArray, *, @@ -218,8 +213,6 @@ def score( Parameters ---------- - self: RegressorMixin - X: Sequence(s). diff --git a/sequentia/models/hmm/__init__.py b/sequentia/models/hmm/__init__.py index 480da35..ea7f699 100644 --- a/sequentia/models/hmm/__init__.py +++ b/sequentia/models/hmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/models/hmm/classifier.py b/sequentia/models/hmm/classifier.py index be0f31e..2da8a38 100644 --- a/sequentia/models/hmm/classifier.py +++ b/sequentia/models/hmm/classifier.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -17,12 +17,12 @@ import pydantic as pyd from sklearn.utils.validation import NotFittedError -from sequentia._internal import _data, _multiprocessing, _validation +from sequentia._internal import _data, _multiprocessing, _sklearn, _validation from sequentia._internal._typing import Array, FloatArray, IntArray from sequentia.datasets.base import SequentialDataset from sequentia.enums import PriorMode from sequentia.models.base import ClassifierMixin -from sequentia.models.hmm.variants.base import BaseHMM +from sequentia.models.hmm import variants class HMMClassifier(ClassifierMixin): @@ -35,8 +35,9 @@ class HMMClassifier(ClassifierMixin): Examples -------- - Using a :class:`.HMMClassifier` (with :class:`.GaussianMixtureHMM` - models) to classify spoken digits. :: + Using a :class:`.HMMClassifier` with :class:`.GaussianMixtureHMM` + models for each class (all with identical settings), + to classify spoken digits. :: import numpy as np from sequentia.datasets import load_digits @@ -47,7 +48,29 @@ class HMMClassifier(ClassifierMixin): # Fetch MFCCs of spoken digits data = load_digits() - train_data, test_data = data.split(test_size=0.2, random_state=random_state) + train_data, test_data = data.split( + test_size=0.2, random_state=random_state + ) + + # Create a HMMClassifier using: + # - a separate GaussianMixtureHMM for each class (with 3 states) + # - a class frequency prior + clf = HMMClassifier( + variant=GaussianMixtureHMM, + model_kwargs=dict(n_states=3, random_state=random_state) + prior='frequency', + ) + + # Fit the HMMs by providing observation sequences for all classes + clf.fit(train_data.X, train_data.y, lengths=train_data.lengths) + + # Predict classes for the test observation sequences + y_pred = clf.predict(test_data.X, lengths=test_data.lengths) + + For more complex problems, it might be necessary to specify different + hyper-parameters for each individual class HMM. This can be done by + using :func:`add_model` or :func:`add_models` to add HMM objects + after the :class:`HMMClassifier` has been initialized. :: # Create a HMMClassifier using a class frequency prior clf = HMMClassifier(prior='frequency') @@ -57,24 +80,18 @@ class HMMClassifier(ClassifierMixin): model = GaussianMixtureHMM(random_state=random_state) clf.add_model(model, label=label) - # Fit the HMMs by providing training observation sequences for all classes + # Fit the HMMs by providing observation sequences for all classes clf.fit(train_data.X, train_data.y, lengths=train_data.lengths) - # Predict classes for the test observation sequences - y_pred = clf.predict(test_data.X, lengths=test_data.lengths) - - As done in the above example, we can provide unfitted HMMs using - :func:`add_model` or :func:`add_models`, then provide training - observation sequences for all classes to :func:`fit`, which will - automatically train each HMM on the appropriate subset of data. - - Alternatively, we may provide pre-fitted HMMs and call :func:`fit` with - no arguments. :: + Alternatively, we might want to pre-fit the HMMs individually, + then add these fitted HMMs to the :class:`.HMMClassifier`. In this case, + :func:`fit` on the :class:`.HMMClassifier` is called without providing any + data as arguments, since the HMMs are already fitted. :: # Create a HMMClassifier using a class frequency prior clf = HMMClassifier(prior='frequency') - # Manually fit each HMM on its own subset of data + # Manually fit each HMM on its own subset of data for X_train, lengths_train, label for train_data.iter_by_class(): model = GaussianMixtureHMM(random_state=random_state) model.fit(X_train, lengths=lengths_train) @@ -82,28 +99,41 @@ class HMMClassifier(ClassifierMixin): # Fit the classifier clf.fit() - """ # noqa: E501 + """ @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, + variant: type[variants.CategoricalHMM] + | type[variants.GaussianMixtureHMM] + | None = None, + model_kwargs: dict[str, t.Any] | None = None, prior: ( - PriorMode | dict[int, pyd.confloat(ge=0, le=1)] - ) = PriorMode.UNIFORM, # placeholder + PriorMode | dict[int, t.Annotated[float, pyd.Field(ge=0, le=1)]] + ) = PriorMode.UNIFORM, classes: list[int] | None = None, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, - ) -> pyd.SkipValidation: + ) -> None: """Initialize a :class:`.HMMClassifier`. Parameters ---------- - self: HMMClassifier + variant: + Variant of HMM to use for modelling each class. If not specified, + models must instead be added using the :func:`add_model` or + :func:`add_models` methods after the :class:`.HMMClassifier` has + been initialized. + + model_kwargs: + If ``variant`` is specified, these parameters are used to + initialize the created HMM object(s). Note that all HMMs + will be created with identical settings. prior: Type of prior probability to assign to each HMM. - - If ``None``, a uniform prior will be used, making each HMM + - If ``"uniform"``, a uniform prior will be used, making each HMM equally likely. - If ``"frequency"``, the prior probability of each HMM is equal to the fraction of total observation sequences that the HMM was @@ -134,39 +164,43 @@ class labels provided here. ------- HMMClassifier """ + #: Type of HMM to use for each class. + self.variant: ( + type[variants.CategoricalHMM] + | type[variants.GaussianMixtureHMM] + | None + ) = variant + #: Model parameters for initializing HMMs. + self.model_kwargs: dict[str, t.Any] | None = model_kwargs #: Type of prior probability to assign to each HMM. - self.prior: PriorMode | dict[int, pyd.confloat(ge=0, le=1)] = prior + self.prior: ( + PriorMode | dict[int, t.Annotated[float, pyd.Field(ge=0, le=1)]] + ) = prior #: Set of possible class labels. self.classes: list[int] | None = classes #: Maximum number of concurrently running workers. self.n_jobs: pyd.PositiveInt | pyd.NegativeInt = n_jobs #: HMMs constituting the :class:`.HMMClassifier`. - self.models: dict[int, BaseHMM] = {} + self.models: dict[int, variants.BaseHMM] = {} + # Allow metadata routing for lengths - self.set_fit_request(lengths=True) - self.set_predict_request(lengths=True) - self.set_predict_proba_request(lengths=True) - self.set_predict_log_proba_request(lengths=True) - self.set_score_request( - lengths=True, - normalize=True, - sample_weight=True, - ) + if _sklearn.routing_enabled(): + self.set_fit_request(lengths=True) + self.set_predict_request(lengths=True) + self.set_predict_proba_request(lengths=True) + self.set_predict_log_proba_request(lengths=True) + self.set_score_request( + lengths=True, + normalize=True, + sample_weight=True, + ) @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) - def add_model( - self: pyd.SkipValidation, - model: BaseHMM, - /, - *, - label: int, - ) -> pyd.SkipValidation: + def add_model(self, model: variants.BaseHMM, /, *, label: int) -> t.Self: """Add a single HMM to the classifier. Parameters ---------- - self: HMMClassifier - model: HMM to add to the classifier. @@ -196,17 +230,11 @@ def add_model( return self @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) - def add_models( - self: pyd.SkipValidation, - models: dict[int, BaseHMM], - /, - ) -> pyd.SkipValidation: + def add_models(self, models: dict[int, variants.BaseHMM], /) -> t.Self: """Add HMMs to the classifier. Parameters ---------- - self: HMMClassifier - models: HMMs to add to the classifier. The key for each HMM should be the label of the class represented by the HMM. @@ -226,24 +254,23 @@ def add_models( return self def fit( - self: HMMClassifier, + self, X: Array | None = None, y: IntArray | None = None, *, lengths: IntArray | None = None, - ) -> HMMClassifier: + ) -> t.Self: """Fit the HMMs to the sequence(s) in ``X``. - If fitted models were provided with :func:`add_model` or :func:`add_models`, no arguments should be passed to :func:`fit`. - If unfitted models were provided with :func:`add_model` or - :func:`add_models`, training data ``X``, ``y`` and ``lengths`` - must be provided to :func:`fit`. + :func:`add_models`, or a ``variant`` was specified in + :func:`HMMClassifier.__init__`, training data ``X``, ``y`` and + ``lengths`` must be provided to :func:`fit`. Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -289,6 +316,13 @@ def fit( y = _validation.check_y(y, lengths=lengths, dtype=np.int8) self.classes_ = _validation.check_classes(y, classes=self.classes) + # Initialize models based on instructor spec if provided + if self.variant: + model_kwargs = self.model_kwargs or {} + self.models = { + label: self.variant(**model_kwargs) for label in self.classes_ + } + # Check that each label has a HMM (and vice versa) if set(self.models.keys()) != set(self.classes_): msg = ( @@ -306,11 +340,27 @@ def fit( lengths=lengths, classes=self.classes_, ) - for X_c, lengths_c, c in dataset.iter_by_class(): - self.models[c].fit(X_c, lengths=lengths_c) + + # get number of jobs + n_jobs = _multiprocessing.effective_n_jobs( + self.n_jobs, x=self.classes_ + ) + + # fit models in parallel + self.models = dict( + zip( + self.classes_, + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( + joblib.delayed(self.models[c].fit)( + X_c, lengths=lengths_c + ) + for X_c, lengths_c, c in dataset.iter_by_class() + ), + ) + ) # Set class priors - models: t.Iterator[int, BaseHMM] = self.models.items() + models: t.Iterable[int, variants.BaseHMM] = self.models.items() if self.prior == PriorMode.UNIFORM: self.prior_ = {c: 1 / len(self.classes_) for c, _ in models} elif self.prior == PriorMode.FREQUENCY: @@ -330,7 +380,7 @@ def fit( @_validation.requires_fit def predict( - self: HMMClassifier, + self, X: Array, *, lengths: IntArray | None = None, @@ -339,8 +389,6 @@ def predict( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -365,15 +413,13 @@ def predict( @_validation.requires_fit def predict_log_proba( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict log un-normalized posterior probabilities for the sequences in ``X``. Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -396,7 +442,7 @@ def predict_log_proba( @_validation.requires_fit def predict_proba( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict class probabilities for the sequence(s) in ``X``. @@ -405,8 +451,6 @@ def predict_proba( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -433,7 +477,7 @@ def predict_proba( @_validation.requires_fit def predict_scores( - self: HMMClassifier, X: Array, *, lengths: IntArray | None = None + self, X: Array, *, lengths: IntArray | None = None ) -> FloatArray: """Predict class scores for the sequence(s) in ``X``. @@ -442,8 +486,6 @@ def predict_scores( Parameters ---------- - self: HMMClassifier - X: Sequence(s). @@ -462,7 +504,7 @@ def predict_scores( ----- This method requires a trained classifier — see :func:`fit`. """ - model: BaseHMM = next(iter(self.models.values())) + model: variants.BaseHMM = next(iter(self.models.values())) X, lengths = _validation.check_X_lengths( X, lengths=lengths, @@ -471,20 +513,18 @@ def predict_scores( n_jobs = _multiprocessing.effective_n_jobs(self.n_jobs, x=lengths) chunk_idxs = np.array_split(_data.get_idxs(lengths), n_jobs) return np.concatenate( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._compute_scores_chunk)(X, idxs=idxs) for idxs in chunk_idxs ) ) @_validation.requires_fit - def save(self: HMMClassifier, path: str | pathlib.Path | t.IO, /) -> None: + def save(self, path: str | pathlib.Path | t.IO, /) -> None: """Serialize and save a fitted HMM classifier. Parameters ---------- - self: HMMClassifier - path: Location to save the serialized classifier. @@ -509,17 +549,11 @@ def save(self: HMMClassifier, path: str | pathlib.Path | t.IO, /) -> None: joblib.dump(state, path) @classmethod - def load( - cls: type[HMMClassifier], - path: str | pathlib.Path | t.IO, - /, - ) -> HMMClassifier: + def load(cls, path: str | pathlib.Path | t.IO, /) -> HMMClassifier: """Load and deserialize a fitted HMM classifier. Parameters ---------- - cls: type[HMMClassifier] - path: Location to load the serialized classifier from. @@ -547,7 +581,7 @@ def load( return model def _compute_scores_chunk( - self: HMMClassifier, X: Array, /, *, idxs: IntArray + self, X: Array, /, *, idxs: IntArray ) -> FloatArray: """Compute log posterior probabilities for a chunk of sequences.""" scores = np.zeros((len(idxs), len(self.classes_))) @@ -556,7 +590,7 @@ def _compute_scores_chunk( return scores def _compute_log_posterior( - self: HMMClassifier, + self, x: Array, /, ) -> FloatArray: diff --git a/sequentia/models/hmm/variants/__init__.py b/sequentia/models/hmm/variants/__init__.py index b40b57f..ea339bb 100644 --- a/sequentia/models/hmm/variants/__init__.py +++ b/sequentia/models/hmm/variants/__init__.py @@ -1,11 +1,12 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). """Supported hidden Markov Model variants.""" +from sequentia.models.hmm.variants.base import BaseHMM from sequentia.models.hmm.variants.categorical import CategoricalHMM from sequentia.models.hmm.variants.gaussian_mixture import GaussianMixtureHMM -__all__ = ["CategoricalHMM", "GaussianMixtureHMM"] +__all__ = ["BaseHMM", "CategoricalHMM", "GaussianMixtureHMM"] diff --git a/sequentia/models/hmm/variants/base.py b/sequentia/models/hmm/variants/base.py index d80d4e7..3e0b864 100644 --- a/sequentia/models/hmm/variants/base.py +++ b/sequentia/models/hmm/variants/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -34,13 +34,13 @@ class BaseHMM(BaseEstimator, metaclass=abc.ABCMeta): @abc.abstractmethod def __init__( - self: BaseHMM, + self, *, n_states: pyd.PositiveInt, topology: enums.TopologyMode | None, random_state: pyd.NonNegativeInt | np.random.RandomState | None, hmmlearn_kwargs: dict[str, t.Any] | None, - ) -> BaseHMM: + ) -> None: self.n_states: int = n_states """Number of states in the Markov chain.""" @@ -66,19 +66,12 @@ def __init__( self._skip_init_params = set() self._skip_params = set() - def fit( - self: BaseHMM, - X: Array, - *, - lengths: IntArray | None = None, - ) -> BaseHMM: + def fit(self, X: Array, *, lengths: IntArray | None = None) -> t.Self: """Fit the HMM to the sequences in ``X``, using the Baum—Welch algorithm. Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -123,14 +116,12 @@ def fit( return self @_validation.requires_fit - def score(self: BaseHMM, x: Array, /) -> float: + def score(self, x: Array, /) -> float: """Calculate the log-likelihood of the HMM generating a single observation sequence. Parameters ---------- - self: BaseHMM - x: Sequence. @@ -152,7 +143,7 @@ def score(self: BaseHMM, x: Array, /) -> float: @abc.abstractproperty @_validation.requires_fit - def n_params(self: BaseHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = 0 if "s" not in self._skip_params: @@ -163,7 +154,7 @@ def n_params(self: BaseHMM) -> int: @_validation.requires_fit def bic( - self: BaseHMM, + self, X: Array, *, lengths: IntArray | None = None, @@ -173,8 +164,6 @@ def bic( Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -200,7 +189,7 @@ def bic( @_validation.requires_fit def aic( - self: BaseHMM, + self, X: Array, *, lengths: IntArray | None = None, @@ -210,8 +199,6 @@ def aic( Parameters ---------- - self: BaseHMM - X: Sequence(s). @@ -236,10 +223,10 @@ def aic( @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def set_state_start_probs( - self: pyd.SkipValidation, + self, probs: ( FloatArray | enums.TransitionMode - ) = enums.TransitionMode.RANDOM, # placeholder + ) = enums.TransitionMode.RANDOM, /, ) -> None: """Set the initial state probabilities. @@ -258,8 +245,6 @@ def set_state_start_probs( Parameters ---------- - self: BaseHMM - probs: Probabilities or probability type to assign as initial state probabilities. @@ -285,10 +270,10 @@ def set_state_start_probs( @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def set_state_transition_probs( - self: pyd.SkipValidation, + self, probs: ( FloatArray | enums.TransitionMode - ) = enums.TransitionMode.RANDOM, # placeholder + ) = enums.TransitionMode.RANDOM, /, ) -> None: """Set the transition probability matrix. @@ -307,8 +292,6 @@ def set_state_transition_probs( Parameters ---------- - self: BaseHMM - probs: Probabilities or probability type to assign as state transition probabilities. @@ -335,7 +318,7 @@ def set_state_transition_probs( self._skip_init_params |= set("t") @abc.abstractmethod - def freeze(self: BaseHMM, params: str | None, /) -> None: + def freeze(self, params: str | None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from be updated during the Baum—Welch algorithm. """ @@ -343,19 +326,19 @@ def freeze(self: BaseHMM, params: str | None, /) -> None: self._skip_params |= set(self._modify_params(params or defaults)) @abc.abstractmethod - def unfreeze(self: BaseHMM, params: str | None, /) -> None: + def unfreeze(self, params: str | None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. """ defaults = self._hmmlearn_kwargs_defaults()["params"] self._skip_params -= set(self._modify_params(params or defaults)) - def _modify_params(self: BaseHMM, params: str) -> str: + def _modify_params(self, params: str) -> str: """Validate parameters to be frozen/unfrozen.""" defaults = self._hmmlearn_kwargs_defaults()["params"] msg = ( "Expected a string consisting of any combination of " - f"{defaults!r}" # + f"{defaults!r}" ) if isinstance(params, str): if bool(re.compile(rf"[^{defaults}]").search(params)): @@ -364,7 +347,7 @@ def _modify_params(self: BaseHMM, params: str) -> str: raise TypeError(msg) return params - def _check_init_params(self: BaseHMM) -> None: + def _check_init_params(self) -> None: """Validate hmmlearn init_params argument.""" topology = self.topology_ or _hmm.topologies.ErgodicTopology( n_states=self.n_states, @@ -401,7 +384,7 @@ def _check_init_params(self: BaseHMM) -> None: @classmethod def _check_hmmlearn_kwargs( - cls: type[BaseHMM], kwargs: dict[str, t.Any] | None + cls, kwargs: dict[str, t.Any] | None ) -> dict[str, t.Any]: """Check hmmlearn forwarded key-word arguments.""" defaults: dict[str, t.Any] = cls._hmmlearn_kwargs_defaults() @@ -456,7 +439,7 @@ def _check_hmmlearn_kwargs( return kwargs @abc.abstractmethod - def _init_hmm(self: BaseHMM, **kwargs: t.Any) -> hmmlearn.base.BaseHMM: + def _init_hmm(self, **kwargs: t.Any) -> hmmlearn.base.BaseHMM: """Initialize the hmmlearn model.""" raise NotImplementedError diff --git a/sequentia/models/hmm/variants/categorical.py b/sequentia/models/hmm/variants/categorical.py index fdc9404..3c1ddc6 100644 --- a/sequentia/models/hmm/variants/categorical.py +++ b/sequentia/models/hmm/variants/categorical.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -59,19 +59,17 @@ class CategoricalHMM(BaseHMM): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, n_states: pyd.PositiveInt = 5, topology: enums.TopologyMode | None = enums.TopologyMode.LEFT_RIGHT, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, hmmlearn_kwargs: dict[str, t.Any] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.CategoricalHMM`. Parameters ---------- - self: CategoricalHMM - n_states: Number of states in the Markov chain. @@ -104,18 +102,14 @@ def __init__( @property @_validation.requires_fit - def n_params(self: CategoricalHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = super().n_params if "e" not in self._skip_params: n_params += self.model.emissionprob_.size return n_params - def set_state_emission_probs( - self: CategoricalHMM, - probs: FloatArray, - /, - ) -> None: + def set_state_emission_probs(self, probs: FloatArray, /) -> None: """Set the state emission distribution of the HMM's emission model. If this method is **not** called, emission probabilities will be @@ -124,8 +118,6 @@ def set_state_emission_probs( Parameters ---------- - self: CategoricalHMM - probs: Array of emission probabilities. @@ -136,14 +128,12 @@ def set_state_emission_probs( self._emissionprob = np.array(probs, dtype=np.float64) self._skip_init_params |= set("e") - def freeze(self: CategoricalHMM, params: str | None = None, /) -> None: + def freeze(self, params: str | None = None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from being updated during the Baum—Welch algorithm. Parameters ---------- - self: CategoricalHMM - params: A string specifying which parameters to freeze. Can contain a combination of: @@ -164,14 +154,12 @@ def freeze(self: CategoricalHMM, params: str | None = None, /) -> None: """ super().freeze(params) - def unfreeze(self: CategoricalHMM, params: str | None = None, /) -> None: + def unfreeze(self, params: str | None = None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. Parameters ---------- - self: CategoricalHMM - params: A string specifying which parameters to unfreeze. Can contain a combination of: @@ -189,10 +177,7 @@ def unfreeze(self: CategoricalHMM, params: str | None = None, /) -> None: """ super().unfreeze(params) - def _init_hmm( - self: CategoricalHMM, - **kwargs: t.Any, - ) -> hmmlearn.hmm.CategoricalHMM: + def _init_hmm(self, **kwargs: t.Any) -> hmmlearn.hmm.CategoricalHMM: """Initialize the hmmlearn model.""" return hmmlearn.hmm.CategoricalHMM( n_components=self.n_states, diff --git a/sequentia/models/hmm/variants/gaussian_mixture.py b/sequentia/models/hmm/variants/gaussian_mixture.py index 87bfcf8..58042a8 100644 --- a/sequentia/models/hmm/variants/gaussian_mixture.py +++ b/sequentia/models/hmm/variants/gaussian_mixture.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -60,7 +60,7 @@ class GaussianMixtureHMM(BaseHMM): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, n_states: pyd.PositiveInt = 5, n_components: pyd.PositiveInt = 3, @@ -68,13 +68,11 @@ def __init__( topology: enums.TopologyMode | None = enums.TopologyMode.LEFT_RIGHT, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, hmmlearn_kwargs: dict[str, t.Any] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.GaussianMixtureHMM`. Parameters ---------- - self: GaussianMixtureHMM - n_states: Number of states in the Markov chain. @@ -122,7 +120,7 @@ def __init__( @property @_validation.requires_fit - def n_params(self: GaussianMixtureHMM) -> int: + def n_params(self) -> int: """Number of trainable parameters — requires :func:`fit`.""" n_params = super().n_params() if "m" not in self._skip_params: @@ -133,11 +131,7 @@ def n_params(self: GaussianMixtureHMM) -> int: n_params += self.model.weights_.size return n_params - def set_state_means( - self: GaussianMixtureHMM, - means: FloatArray, - /, - ) -> None: + def set_state_means(self, means: FloatArray, /) -> None: """Set the mean vectors of the state emission distributions. If this method is **not** called, mean vectors will be @@ -146,8 +140,6 @@ def set_state_means( Parameters ---------- - self: GaussianMixtureHMM - means: Array of mean values. @@ -158,11 +150,7 @@ def set_state_means( self._means = np.array(means, dtype=np.float64) self._skip_init_params |= set("m") - def set_state_covars( - self: GaussianMixtureHMM, - covars: FloatArray, - /, - ) -> None: + def set_state_covars(self, covars: FloatArray, /) -> None: """Set the covariance matrices of the state emission distributions. If this method is **not** called, covariance matrices will be @@ -171,8 +159,6 @@ def set_state_covars( Parameters ---------- - self: GaussianMixtureHMM - covars: Array of covariance values. @@ -183,11 +169,7 @@ def set_state_covars( self._covars = np.array(covars, dtype=np.float64) self._skip_init_params |= set("c") - def set_state_weights( - self: GaussianMixtureHMM, - weights: FloatArray, - /, - ) -> None: + def set_state_weights(self, weights: FloatArray, /) -> None: """Set the component mixture weights of the state emission distributions. @@ -197,8 +179,6 @@ def set_state_weights( Parameters ---------- - self: GaussianMixtureHMM - weights: Array of component mixture weights. @@ -209,18 +189,12 @@ def set_state_weights( self._weights = np.array(weights, dtype=np.float64) self._skip_init_params |= set("w") - def freeze( - self: GaussianMixtureHMM, - params: str | None = None, - /, - ) -> None: + def freeze(self, params: str | None = None, /) -> None: """Freeze the trainable parameters of the HMM, preventing them from be updated during the Baum—Welch algorithm. Parameters ---------- - self: GaussianMixtureHMM - params: A string specifying which parameters to freeze. Can contain a combination of: @@ -239,18 +213,12 @@ def freeze( """ super().freeze(params) - def unfreeze( - self: GaussianMixtureHMM, - params: str | None = None, - /, - ) -> None: + def unfreeze(self, params: str | None = None, /) -> None: """Unfreeze the trainable parameters of the HMM, allowing them to be updated during the Baum—Welch algorithm. Parameters ---------- - self: GaussianMixtureHMM - params: A string specifying which parameters to unfreeze. Can contain a combination of: @@ -270,7 +238,7 @@ def unfreeze( super().unfreeze(params) def _init_hmm( - self: GaussianMixtureHMM, + self, **kwargs: t.Any, ) -> hmmlearn.hmm.GMMHMM: """Initialize the hmmlearn model.""" diff --git a/sequentia/models/knn/__init__.py b/sequentia/models/knn/__init__.py index 8e88274..abb405c 100644 --- a/sequentia/models/knn/__init__.py +++ b/sequentia/models/knn/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/models/knn/base.py b/sequentia/models/knn/base.py index d2d91e9..0d25325 100644 --- a/sequentia/models/knn/base.py +++ b/sequentia/models/knn/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -30,7 +30,7 @@ class KNNMixin: @_validation.requires_fit def query_neighbors( - self: KNNMixin, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -41,8 +41,6 @@ def query_neighbors( Parameters ---------- - self: KNNMixin - X: Sequence(s). @@ -94,7 +92,7 @@ def query_neighbors( @_validation.requires_fit def compute_distance_matrix( - self: KNNMixin, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -104,8 +102,6 @@ def compute_distance_matrix( Parameters ---------- - self: KNNMixin - X: Sequence(s). @@ -143,7 +139,7 @@ def compute_distance_matrix( # multiprocessed DTW calculation return np.vstack( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._distance_matrix_row_chunk)( row_idxs, col_chunk_idxs, X, n_jobs, dtw ) @@ -152,13 +148,11 @@ def compute_distance_matrix( ) @_validation.requires_fit - def dtw(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def dtw(self, A: FloatArray, B: FloatArray) -> float: """Calculate the DTW distance between two observation sequences. Parameters ---------- - self: KNNMixin - A: The first sequence. @@ -179,7 +173,7 @@ def dtw(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: return self._dtw()(A, B) def _dtw1d( - self: KNNMixin, + self, a: FloatArray, b: FloatArray, *, @@ -193,11 +187,11 @@ def _dtw1d( window=window, ) - def _window(self: KNNMixin, A: FloatArray, B: FloatArray) -> int: + def _window(self, A: FloatArray, B: FloatArray) -> int: """Calculate the absolute DTW window size.""" return int(self.window * min(len(A), len(B))) - def _dtwi(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def _dtwi(self, A: FloatArray, B: FloatArray) -> float: """Compute the multivariate DTW distance as the sum of the pairwise per-feature DTW distances, allowing each feature to be warped independently. @@ -210,7 +204,7 @@ def dtw(a: FloatArray, b: FloatArray) -> float: return np.sum([dtw(A[:, i], B[:, i]) for i in range(A.shape[1])]) - def _dtwd(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: + def _dtwd(self, A: FloatArray, B: FloatArray) -> float: """Compute the multivariate DTW distance so that the warping of the features depends on each other, by modifying the local distance measure. @@ -223,18 +217,18 @@ def _dtwd(self: KNNMixin, A: FloatArray, B: FloatArray) -> float: window=window, ) - def _dtw(self: KNNMixin) -> t.Callable[[FloatArray], float]: + def _dtw(self) -> t.Callable[[FloatArray], float]: """Conditional DTW callable.""" return self._dtwi if self.independent else self._dtwd - def _weighting(self: KNNMixin) -> t.Callable[[FloatArray], FloatArray]: + def _weighting(self) -> t.Callable[[FloatArray], FloatArray]: """Weighting function - use equal weighting if not provided.""" if callable(self.weighting): return self.weighting return np.ones_like def _distance_matrix_row_chunk( - self: KNNMixin, + self, row_idxs: IntArray, col_chunk_idxs: list[IntArray], X: FloatArray, @@ -245,7 +239,7 @@ def _distance_matrix_row_chunk( columns. """ return np.hstack( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._distance_matrix_row_col_chunk)( col_idxs, row_idxs, X, dtw ) @@ -254,7 +248,7 @@ def _distance_matrix_row_chunk( ) def _distance_matrix_row_col_chunk( - self: KNNMixin, + self, col_idxs: IntArray, row_idxs: IntArray, X: FloatArray, @@ -270,17 +264,11 @@ def _distance_matrix_row_col_chunk( return distances @_validation.requires_fit - def save( - self: KNNMixin, - path: str | pathlib.Path | t.IO, - /, - ) -> None: + def save(self, path: str | pathlib.Path | t.IO, /) -> None: """Serialize and save a fitted KNN estimator. Parameters ---------- - self: KNNMixin - path: Location to save the serialized estimator. @@ -312,17 +300,11 @@ def save( joblib.dump(state, path) @classmethod - def load( - cls: type[KNNMixin], - path: str | pathlib.Path | t.IO, - /, - ) -> KNNMixin: + def load(cls, path: str | pathlib.Path | t.IO, /) -> KNNMixin: """Load and deserialize a fitted KNN estimator. Parameters ---------- - cls: type[KNNMixin] - path: Location to load the serialized estimator from. diff --git a/sequentia/models/knn/classifier.py b/sequentia/models/knn/classifier.py index 51af161..76b5240 100644 --- a/sequentia/models/knn/classifier.py +++ b/sequentia/models/knn/classifier.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -16,7 +16,7 @@ import numpy as np import pydantic as pyd -from sequentia._internal import _data, _multiprocessing, _validation +from sequentia._internal import _data, _multiprocessing, _sklearn, _validation from sequentia._internal._typing import Array, FloatArray, IntArray from sequentia.models.base import ClassifierMixin from sequentia.models.knn.base import KNNMixin @@ -59,23 +59,21 @@ class KNNClassifier(KNNMixin, ClassifierMixin): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, k: pyd.PositiveInt = 1, weighting: t.Callable[[FloatArray], FloatArray] | None = None, - window: pyd.confloat(ge=0.0, le=1.0) = 1.0, + window: t.Annotated[float, pyd.Field(ge=0, le=1)] = 1.0, independent: bool = False, - use_c: bool = False, + use_c: bool = True, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, classes: list[int] | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.KNNClassifier`. Parameters ---------- - self: KNNClassifier - k: Number of neighbors. @@ -142,9 +140,7 @@ def __init__( self.k: int = k """Number of neighbors.""" - self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = ( - weighting # placeholder - ) + self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = weighting """A callable that specifies how distance weighting should be performed.""" @@ -172,29 +168,28 @@ def __init__( """Set of possible class labels.""" # Allow metadata routing for lengths - self.set_fit_request(lengths=True) - self.set_predict_request(lengths=True) - self.set_predict_log_proba_request(lengths=True) - self.set_predict_proba_request(lengths=True) - self.set_score_request( - lengths=True, - normalize=True, - sample_weight=True, - ) + if _sklearn.routing_enabled(): + self.set_fit_request(lengths=True) + self.set_predict_request(lengths=True) + self.set_predict_log_proba_request(lengths=True) + self.set_predict_proba_request(lengths=True) + self.set_score_request( + lengths=True, + normalize=True, + sample_weight=True, + ) def fit( - self: KNNClassifier, + self, X: FloatArray, y: IntArray, *, lengths: IntArray | None = None, - ) -> KNNClassifier: + ) -> t.Self: """Fit the classifier to the sequence(s) in ``X``. Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -232,7 +227,7 @@ def fit( @_validation.requires_fit def predict( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -241,8 +236,6 @@ def predict( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -266,7 +259,7 @@ def predict( @_validation.requires_fit def predict_log_proba( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -277,8 +270,6 @@ def predict_log_proba( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -301,7 +292,7 @@ def predict_log_proba( @_validation.requires_fit def predict_proba( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -312,8 +303,6 @@ def predict_proba( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -337,7 +326,7 @@ def predict_proba( @_validation.requires_fit def predict_scores( - self: KNNClassifier, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -349,8 +338,6 @@ def predict_scores( Parameters ---------- - self: KNNClassifier - X: Sequence(s). @@ -378,7 +365,7 @@ def predict_scores( return self._compute_scores(k_labels, k_weightings) def _compute_scores( - self: KNNClassifier, labels: IntArray, weightings: FloatArray + self, labels: IntArray, weightings: FloatArray ) -> FloatArray: """Calculate the sum of the weightings for each label group.""" scores = np.zeros((len(labels), len(self.classes_))) @@ -387,7 +374,7 @@ def _compute_scores( return scores def _find_max_labels( - self: KNNClassifier, + self, scores: FloatArray, /, ) -> IntArray: @@ -397,15 +384,13 @@ def _find_max_labels( n_jobs = _multiprocessing.effective_n_jobs(self.n_jobs, x=scores) score_chunks = np.array_split(scores, n_jobs) return np.concatenate( - joblib.Parallel(n_jobs=n_jobs, max_nbytes=None)( + joblib.Parallel(n_jobs=n_jobs, mmap_mode="r+")( joblib.delayed(self._find_max_labels_chunk)(score_chunk) for score_chunk in score_chunks ) ) - def _find_max_labels_chunk( - self: KNNClassifier, score_chunk: FloatArray, / - ) -> IntArray: + def _find_max_labels_chunk(self, score_chunk: FloatArray, /) -> IntArray: """Return the label with the highest score for each item in the chunk. """ diff --git a/sequentia/models/knn/regressor.py b/sequentia/models/knn/regressor.py index 88ed9ba..8e9e83c 100644 --- a/sequentia/models/knn/regressor.py +++ b/sequentia/models/knn/regressor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -14,7 +14,7 @@ import numpy as np import pydantic as pyd -from sequentia._internal import _data, _validation +from sequentia._internal import _data, _sklearn, _validation from sequentia._internal._typing import FloatArray, IntArray from sequentia.models.base import RegressorMixin from sequentia.models.knn.base import KNNMixin @@ -33,22 +33,20 @@ class KNNRegressor(KNNMixin, RegressorMixin): @pyd.validate_call(config=dict(arbitrary_types_allowed=True)) def __init__( - self: pyd.SkipValidation, + self, *, k: pyd.PositiveInt = 1, weighting: t.Callable[[FloatArray], FloatArray] | None = None, - window: pyd.confloat(ge=0.0, le=1.0) = 1.0, + window: t.Annotated[float, pyd.Field(ge=0, le=1)] = 1.0, independent: bool = False, - use_c: bool = False, + use_c: bool = True, n_jobs: pyd.PositiveInt | pyd.NegativeInt = 1, random_state: pyd.NonNegativeInt | np.random.RandomState | None = None, - ) -> pyd.SkipValidation: + ) -> None: """Initializes the :class:`.KNNRegressor`. Parameters ---------- - self: KNNRegressor - k: Number of neighbors. @@ -64,7 +62,8 @@ def __init__( If ``None``, then a uniform weighting of 1 will be applied to all distances. - window: The size of the Sakoe—Chiba band global constrant as a + window: + The size of the Sakoe—Chiba band global constrant as a fraction of the length of the shortest of the two sequences being compared. @@ -106,9 +105,7 @@ def __init__( self.k: int = k """Number of neighbors.""" - self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = ( - weighting # placeholder - ) + self.weighting: t.Callable[[np.ndarray], np.ndarray] | None = weighting """A callable that specifies how distance weighting should be performed.""" @@ -131,23 +128,22 @@ def __init__( reproducible pseudo-randomness.""" # Allow metadata routing for lengths - self.set_fit_request(lengths=True) - self.set_predict_request(lengths=True) - self.set_score_request(lengths=True, sample_weight=True) + if _sklearn.routing_enabled(): + self.set_fit_request(lengths=True) + self.set_predict_request(lengths=True) + self.set_score_request(lengths=True, sample_weight=True) def fit( - self: KNNRegressor, + self, X: FloatArray, y: FloatArray, *, lengths: IntArray | None = None, - ) -> KNNRegressor: + ) -> t.Self: """Fits the regressor to the sequence(s) in ``X``. Parameters ---------- - self: KNNRegressor - X: Sequence(s). @@ -181,7 +177,7 @@ def fit( @_validation.requires_fit def predict( - self: KNNRegressor, + self, X: FloatArray, *, lengths: IntArray | None = None, @@ -190,8 +186,6 @@ def predict( Parameters ---------- - self: KNNRegressor - X: Sequence(s). diff --git a/sequentia/preprocessing/__init__.py b/sequentia/preprocessing/__init__.py index 236a880..ad77bd4 100644 --- a/sequentia/preprocessing/__init__.py +++ b/sequentia/preprocessing/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/sequentia/preprocessing/transforms.py b/sequentia/preprocessing/transforms.py index d609d60..52c7509 100644 --- a/sequentia/preprocessing/transforms.py +++ b/sequentia/preprocessing/transforms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -45,15 +45,17 @@ from __future__ import annotations +import typing as t import warnings import numpy as np import scipy.signal +import sklearn import sklearn.base from sklearn.preprocessing import FunctionTransformer from sklearn.utils.validation import _allclose_dense_sparse, check_array -from sequentia._internal import _data, _validation +from sequentia._internal import _data, _sklearn, _validation from sequentia._internal._typing import Array, FloatArray, IntArray __all__ = ["IndependentFunctionTransformer", "mean_filter", "median_filter"] @@ -122,10 +124,12 @@ def __init__( self.feature_names_out = feature_names_out self.kw_args = kw_args self.inv_kw_args = inv_kw_args + # Allow metadata routing for lengths - self.set_fit_request(lengths=True) - self.set_transform_request(lengths=True) - self.set_inverse_transform_request(lengths=True) + if _sklearn.routing_enabled(): + self.set_fit_request(lengths=True) + self.set_transform_request(lengths=True) + self.set_inverse_transform_request(lengths=True) def _check_input(self, X, *, lengths, reset): if self.validate: @@ -173,18 +177,16 @@ def _check_inverse_transform(self, X, *, lengths): @sklearn.base._fit_context(prefer_skip_nested_validation=True) def fit( - self: IndependentFunctionTransformer, + self, X: Array, y: Array | None = None, *, lengths: IntArray | None = None, - ) -> IndependentFunctionTransformer: + ) -> t.Self: """Fits the transformer to ``X``. Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -210,7 +212,7 @@ def fit( return self def transform( - self: IndependentFunctionTransformer, + self, X: Array, *, lengths: IntArray | None = None, @@ -220,8 +222,6 @@ def transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -242,7 +242,7 @@ def transform( ) def inverse_transform( - self: IndependentFunctionTransformer, + self, X: Array, *, lengths: IntArray | None = None, @@ -251,8 +251,6 @@ def inverse_transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). @@ -280,7 +278,7 @@ def inverse_transform( ) def fit_transform( - self: IndependentFunctionTransformer, + self, X: Array, y: Array | None = None, *, @@ -291,8 +289,6 @@ def fit_transform( Parameters ---------- - self: IndependentFunctionTransformer - X: Sequence(s). diff --git a/sequentia/version.py b/sequentia/version.py index bcec5ca..91e71f8 100644 --- a/sequentia/version.py +++ b/sequentia/version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -33,7 +33,7 @@ __all__ = ["VERSION", "version_info"] -VERSION = "2.0.2" +VERSION = "2.5.0" def version_info() -> str: @@ -79,6 +79,6 @@ def version_info() -> str: "related packages": ", ".join(related_packages), } return "\n".join( - "{:>30} {}".format(k + ":", str(v).replace("\n", " ")) # + "{:>30} {}".format(k + ":", str(v).replace("\n", " ")) for k, v in info.items() ) diff --git a/tasks.py b/tasks.py index b075595..65de3f7 100644 --- a/tasks.py +++ b/tasks.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/__init__.py b/tests/__init__.py index 8d3537b..244ba10 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/conftest.py b/tests/conftest.py index db78dcc..a007cf6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -25,7 +25,7 @@ def combinations(string: str, /) -> t.Iterable[str]: return map( # noqa: C417 lambda params: "".join(params), itertools.chain.from_iterable( - itertools.combinations(string, i) # placeholder + itertools.combinations(string, i) for i in range(1, len(string)) ), ) @@ -39,17 +39,12 @@ def assert_not_equal(a: Array, b: Array, /) -> None: assert not np.allclose(a, b, rtol=1e-3) @classmethod - def assert_all_equal(cls: type[Helpers], A: Array, B: Array, /) -> None: + def assert_all_equal(cls, A: Array, B: Array, /) -> None: for a, b in zip(A, B): cls.assert_equal(a, b) @classmethod - def assert_all_not_equal( - cls: type[Helpers], - A: Array, - B: Array, - /, - ) -> None: + def assert_all_not_equal(cls, A: Array, B: Array, /) -> None: for a, b in zip(A, B): cls.assert_not_equal(a, b) @@ -61,6 +56,6 @@ def assert_distribution(x: Array, /) -> None: assert_almost_equal(x.sum(axis=1), np.ones(len(x))) -@pytest.fixture() +@pytest.fixture def helpers() -> type[Helpers]: return Helpers diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_datasets/__init__.py b/tests/unit/test_datasets/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_datasets/__init__.py +++ b/tests/unit/test_datasets/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_datasets/test_base.py b/tests/unit/test_datasets/test_base.py index 17bbe1b..0059b88 100644 --- a/tests/unit/test_datasets/test_base.py +++ b/tests/unit/test_datasets/test_base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_datasets/test_digits.py b/tests/unit/test_datasets/test_digits.py index 6aab914..de8e9a8 100644 --- a/tests/unit/test_datasets/test_digits.py +++ b/tests/unit/test_datasets/test_digits.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_datasets/test_gene_families.py b/tests/unit/test_datasets/test_gene_families.py index 2baae10..05b5d0b 100644 --- a/tests/unit/test_datasets/test_gene_families.py +++ b/tests/unit/test_datasets/test_gene_families.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_internal/__init__.py b/tests/unit/test_internal/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_internal/__init__.py +++ b/tests/unit/test_internal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_internal/test_data.py b/tests/unit/test_internal/test_data.py index b0421f9..323e9d9 100644 --- a/tests/unit/test_internal/test_data.py +++ b/tests/unit/test_internal/test_data.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_internal/test_hmm/__init__.py b/tests/unit/test_internal/test_hmm/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_internal/test_hmm/__init__.py +++ b/tests/unit/test_internal/test_hmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_internal/test_hmm/test_topologies.py b/tests/unit/test_internal/test_hmm/test_topologies.py index 9b7e7ce..cea4f22 100644 --- a/tests/unit/test_internal/test_hmm/test_topologies.py +++ b/tests/unit/test_internal/test_hmm/test_topologies.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -328,7 +328,7 @@ def test_ergodic_random_transitions_many( def test_ergodic_check_transitions_invalid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate an invalid ergodic transition matrix""" topology = topologies.ErgodicTopology( @@ -342,7 +342,7 @@ def test_ergodic_check_transitions_invalid( def test_ergodic_check_transitions_valid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate a valid ergodic transition matrix""" topology = topologies.ErgodicTopology( @@ -451,7 +451,7 @@ def test_linear_random_transitions_many( def test_linear_check_transitions_invalid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate an invalid linear transition matrix""" topology = topologies.LinearTopology(n_states=5, random_state=random_state) @@ -466,7 +466,7 @@ def test_linear_check_transitions_invalid( def test_linear_check_transitions_valid( - random_state: np.random.RandomState + random_state: np.random.RandomState, ) -> None: """Validate a valid linear transition matrix""" topology = topologies.LinearTopology(n_states=5, random_state=random_state) diff --git a/tests/unit/test_model_selection.py b/tests/unit/test_model_selection.py new file mode 100644 index 0000000..1b88d1f --- /dev/null +++ b/tests/unit/test_model_selection.py @@ -0,0 +1,228 @@ +# Copyright (c) 2019 Sequentia Developers. +# Distributed under the terms of the MIT License (see the LICENSE file). +# SPDX-License-Identifier: MIT +# This source code is part of the Sequentia project (https://github.com/eonu/sequentia). + +from __future__ import annotations + +import numpy as np +import numpy.testing as npt +import pytest +from sklearn.model_selection._split import ( + BaseCrossValidator, + BaseShuffleSplit, +) +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import minmax_scale + +from sequentia.datasets import SequentialDataset, load_digits +from sequentia.enums import CovarianceMode, PriorMode, TopologyMode +from sequentia.model_selection import ( + GridSearchCV, + HalvingGridSearchCV, + KFold, + RandomizedSearchCV, + RepeatedKFold, + RepeatedStratifiedKFold, + ShuffleSplit, + StratifiedKFold, + StratifiedShuffleSplit, + param_grid, +) +from sequentia.model_selection._search import BaseSearchCV +from sequentia.models import ( + GaussianMixtureHMM, + HMMClassifier, + KNNClassifier, + KNNRegressor, +) +from sequentia.preprocessing import IndependentFunctionTransformer + +EPS: np.float32 = np.finfo(np.float32).eps +random_state: np.random.RandomState = np.random.RandomState(0) + + +def exp_weight(x: np.ndarray) -> np.ndarray: + return np.exp(-x) + + +def inv_weight(x: np.ndarray) -> np.ndarray: + return 1 / (x + EPS) + + +@pytest.fixture(scope="module") +def data() -> SequentialDataset: + """Small subset of the spoken digits dataset.""" + digits = load_digits(digits={0, 1}) + _, digits = digits.split( + test_size=0.1, + random_state=random_state, + shuffle=True, + stratify=True, + ) + return digits + + +@pytest.mark.parametrize( + "cv", + [ + KFold, + StratifiedKFold, + ShuffleSplit, + StratifiedShuffleSplit, + RepeatedKFold, + RepeatedStratifiedKFold, + ], +) +@pytest.mark.parametrize( + "search", [GridSearchCV, RandomizedSearchCV, HalvingGridSearchCV] +) +def test_knn_classifier( + data: SequentialDataset, + search: type[BaseSearchCV], + cv: type[BaseCrossValidator] | type[BaseShuffleSplit], +) -> None: + # Specify cross-validator parameters + cv_kwargs = {"random_state": 0, "n_splits": 2} + if cv in (KFold, StratifiedKFold): + cv_kwargs["shuffle"] = True + + # Initialize search, splitter and parameter + optimizer = search( + Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("knn", KNNClassifier(use_c=True, n_jobs=-1)), + ] + ), + { + "knn__k": [1, 5], + "knn__weighting": [exp_weight, inv_weight], + }, + cv=cv(**cv_kwargs), + n_jobs=-1, + ) + + # Perform the hyper-parameter search and retrieve the best model + optimizer.fit(data.X, data.y, lengths=data.lengths) + assert optimizer.best_score_ > 0.8 + clf = optimizer.best_estimator_ + + # Predict labels + y_pred = clf.predict(data.X, lengths=data.lengths) + assert np.isin(y_pred, (0, 1)).all() + + # Predict probabilities + y_probs = clf.predict_proba(data.X, lengths=data.lengths) + assert ((y_probs >= 0) & (y_probs <= 1)).all() + npt.assert_almost_equal(y_probs.sum(axis=1), 1.0) + + # Predict log probabilities + y_log_probs = clf.predict_log_proba(data.X, lengths=data.lengths) + assert (y_log_probs <= 0).all() + npt.assert_almost_equal(y_log_probs, np.log(y_probs)) + + # Calculate accuracy + acc = clf.score(data.X, data.y, lengths=data.lengths) + assert acc > 0.8 + + +@pytest.mark.parametrize( + "cv", + [ + KFold, + StratifiedKFold, + ShuffleSplit, + StratifiedShuffleSplit, + RepeatedKFold, + RepeatedStratifiedKFold, + ], +) +@pytest.mark.parametrize( + "search", [GridSearchCV, RandomizedSearchCV, HalvingGridSearchCV] +) +def test_knn_regressor( + data: SequentialDataset, + search: type[BaseSearchCV], + cv: type[BaseCrossValidator] | type[BaseShuffleSplit], +) -> None: + # Specify cross-validator parameters + cv_kwargs = {"random_state": 0, "n_splits": 2} + if cv in (KFold, StratifiedKFold): + cv_kwargs["shuffle"] = True + + # Initialize search, splitter and parameter + optimizer = search( + Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("knn", KNNRegressor(use_c=True, n_jobs=-1)), + ] + ), + { + "knn__k": [3, 5], + "knn__weighting": [exp_weight, inv_weight], + }, + cv=cv(**cv_kwargs), + n_jobs=-1, + ) + + # Convert labels to float + y = data.y.astype(np.float64) + + # Perform the hyper-parameter search and retrieve the best model + optimizer.fit(data.X, y, lengths=data.lengths) + assert optimizer.best_score_ > 0.8 + model = optimizer.best_estimator_ + + # Predict labels + y_pred = model.predict(data.X, lengths=data.lengths) + assert ((y_pred >= 0) & (y_pred <= 1)).all() + + # Calculate R^2 + r2 = model.score(data.X, y, lengths=data.lengths) + assert r2 > 0.8 + + +def test_hmm_classifier(data: SequentialDataset) -> None: + # Initialize search, splitter and parameter + optimizer = GridSearchCV( + estimator=Pipeline( + [ + ("scale", IndependentFunctionTransformer(minmax_scale)), + ("clf", HMMClassifier(variant=GaussianMixtureHMM, n_jobs=-1)), + ] + ), + param_grid={ + "clf__prior": [PriorMode.UNIFORM, PriorMode.FREQUENCY], + "clf__model_kwargs": param_grid( + n_states=[3, 4, 5], + n_components=[2, 3, 4], + covariance=[CovarianceMode.DIAGONAL, CovarianceMode.SPHERICAL], + topology=[TopologyMode.LEFT_RIGHT, TopologyMode.LINEAR], + ), + }, + cv=StratifiedKFold(), + n_jobs=-1, + ) + + # Perform the hyper-parameter search and retrieve the best model + optimizer.fit(data.X, data.y, lengths=data.lengths) + assert optimizer.best_score_ > 0.8 + clf = optimizer.best_estimator_ + + # Predict labels + y_pred = clf.predict(data.X, lengths=data.lengths) + assert np.isin(y_pred, (0, 1)).all() + + # Predict probabilities + y_probs = clf.predict_proba(data.X, lengths=data.lengths) + assert ((y_probs >= 0) & (y_probs <= 1)).all() + npt.assert_almost_equal(y_probs.sum(axis=1), 1.0) + + # Predict log probabilities + clf.predict_log_proba(data.X, lengths=data.lengths) + + # Calculate accuracy + acc = clf.score(data.X, data.y, lengths=data.lengths) + assert acc > 0.8 diff --git a/tests/unit/test_models/__init__.py b/tests/unit/test_models/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_models/__init__.py +++ b/tests/unit/test_models/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/hmm/__init__.py b/tests/unit/test_models/hmm/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_models/hmm/__init__.py +++ b/tests/unit/test_models/hmm/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/hmm/test_classifier.py b/tests/unit/test_models/hmm/test_classifier.py index 0ca4dbd..e7d180d 100644 --- a/tests/unit/test_models/hmm/test_classifier.py +++ b/tests/unit/test_models/hmm/test_classifier.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -6,6 +6,7 @@ from __future__ import annotations import copy +import enum import os import tempfile import typing as t @@ -37,6 +38,12 @@ n_classes = 7 +class FitMode(enum.StrEnum): + PREFIT = "prefit" + POSTFIT_IDENTICAL = "postfit_identical" + POSTFIT_FLEXIBLE = "postfit_flexible" + + @pytest.fixture(scope="module") def random_state(request: SubRequest) -> np.random.RandomState: return np.random.RandomState(1) @@ -113,18 +120,19 @@ def assert_fit(clf: BaseHMM): }, ], ) -@pytest.mark.parametrize("prefit", [True, False]) +@pytest.mark.parametrize("fit_mode", list(FitMode)) +@pytest.mark.parametrize("n_jobs", [1, -1]) def test_classifier_e2e( request: SubRequest, helpers: t.Any, model: BaseHMM, dataset: SequentialDataset, prior: enums.PriorMode | dict[int, float], + fit_mode: FitMode, + n_jobs: int, random_state: np.random.RandomState, - *, - prefit: bool, ) -> None: - clf = HMMClassifier(prior=prior) + clf = HMMClassifier(prior=prior, n_jobs=n_jobs) clf.add_models({i: copy.deepcopy(model) for i in range(n_classes)}) assert clf.prior == prior @@ -139,12 +147,20 @@ def test_classifier_e2e( test_size=0.2, random_state=random_state, stratify=True ) - if prefit: + if fit_mode == FitMode.PREFIT: for X, lengths, c in train.iter_by_class(): clf.models[c].fit(X, lengths=lengths) assert_fit(clf.fit()) - else: + elif fit_mode == FitMode.POSTFIT_FLEXIBLE: assert_fit(clf.fit(**train.X_y_lengths)) + elif fit_mode == FitMode.POSTFIT_IDENTICAL: + clf = HMMClassifier( + variant=type(model), + model_kwargs=model.get_params(), + prior=prior, + n_jobs=n_jobs, + ) + clf.fit(**train.X_y_lengths) scores_pred = clf.predict_scores(**test.X_lengths) assert scores_pred.shape == (len(test), n_classes) diff --git a/tests/unit/test_models/hmm/variants/__init__.py b/tests/unit/test_models/hmm/variants/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_models/hmm/variants/__init__.py +++ b/tests/unit/test_models/hmm/variants/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/hmm/variants/test_categorical.py b/tests/unit/test_models/hmm/variants/test_categorical.py index ba35326..92f53d5 100644 --- a/tests/unit/test_models/hmm/variants/test_categorical.py +++ b/tests/unit/test_models/hmm/variants/test_categorical.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/hmm/variants/test_gaussian_mixture.py b/tests/unit/test_models/hmm/variants/test_gaussian_mixture.py index d091099..f677fc0 100644 --- a/tests/unit/test_models/hmm/variants/test_gaussian_mixture.py +++ b/tests/unit/test_models/hmm/variants/test_gaussian_mixture.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/knn/__init__.py b/tests/unit/test_models/knn/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_models/knn/__init__.py +++ b/tests/unit/test_models/knn/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/knn/test_classifier.py b/tests/unit/test_models/knn/test_classifier.py index 15f4544..de05694 100644 --- a/tests/unit/test_models/knn/test_classifier.py +++ b/tests/unit/test_models/knn/test_classifier.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_models/knn/test_regressor.py b/tests/unit/test_models/knn/test_regressor.py index 715cd16..fffc3a2 100644 --- a/tests/unit/test_models/knn/test_regressor.py +++ b/tests/unit/test_models/knn/test_regressor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py index dfdbff0..38909cb 100644 --- a/tests/unit/test_pipeline.py +++ b/tests/unit/test_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). @@ -51,7 +51,7 @@ def test_pipeline_with_transforms( ) # check that transforming without fitting doesn't work - with pytest.raises(NotFittedError): + with pytest.raises((NotFittedError, AttributeError)): pipeline.transform(**data.X_lengths) # check that fitting without y works diff --git a/tests/unit/test_preprocessing/__init__.py b/tests/unit/test_preprocessing/__init__.py index cd11e40..b4cba4c 100644 --- a/tests/unit/test_preprocessing/__init__.py +++ b/tests/unit/test_preprocessing/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia). diff --git a/tests/unit/test_preprocessing/test_transforms.py b/tests/unit/test_preprocessing/test_transforms.py index 229ad05..d202f47 100644 --- a/tests/unit/test_preprocessing/test_transforms.py +++ b/tests/unit/test_preprocessing/test_transforms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025 Sequentia Developers. +# Copyright (c) 2019 Sequentia Developers. # Distributed under the terms of the MIT License (see the LICENSE file). # SPDX-License-Identifier: MIT # This source code is part of the Sequentia project (https://github.com/eonu/sequentia).