Skip to content

make default params available to serde #167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/algorithm/neighbour/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ pub enum KNNAlgorithmName {
CoverTree,
}

impl Default for KNNAlgorithmName {
fn default() -> Self {
KNNAlgorithmName::CoverTree
}
}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug)]
pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
Expand Down
11 changes: 10 additions & 1 deletion src/cluster/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,22 @@ pub struct DBSCAN<T: RealNumber, D: Distance<Vec<T>, T>> {
eps: T,
}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// DBSCAN clustering algorithm parameters
pub struct DBSCANParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
#[cfg_attr(feature = "serde", serde(default))]
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub distance: D,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
pub min_samples: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
pub eps: T,
#[cfg_attr(feature = "serde", serde(default))]
/// KNN algorithm to use.
pub algorithm: KNNAlgorithmName,
}
Expand Down Expand Up @@ -113,14 +118,18 @@ impl<T: RealNumber, D: Distance<Vec<T>, T>> DBSCANParameters<T, D> {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct DBSCANSearchParameters<T: RealNumber, D: Distance<Vec<T>, T>> {
#[cfg_attr(feature = "serde", serde(default))]
/// a function that defines a distance between each pair of point in training data.
/// This function should extend [`Distance`](../../math/distance/trait.Distance.html) trait.
/// See [`Distances`](../../math/distance/struct.Distances.html) for a list of available functions.
pub distance: Vec<D>,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
pub min_samples: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum distance between two samples for one to be considered as in the neighborhood of the other.
pub eps: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// KNN algorithm to use.
pub algorithm: Vec<KNNAlgorithmName>,
}
Expand Down Expand Up @@ -221,7 +230,7 @@ impl<T: RealNumber> Default for DBSCANParameters<T, Euclidian> {
distance: Distances::euclidian(),
min_samples: 5,
eps: T::half(),
algorithm: KNNAlgorithmName::CoverTree,
algorithm: KNNAlgorithmName::default(),
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions src/cluster/kmeans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,17 @@ impl<T: RealNumber> PartialEq for KMeans<T> {
}
}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// K-Means clustering algorithm parameters
pub struct KMeansParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Number of clusters.
pub k: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// Maximum number of iterations of the k-means algorithm for a single run.
pub max_iter: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// Determines random number generation for centroid initialization.
/// Use an int to make the randomness deterministic
pub seed: Option<u64>,
Expand Down Expand Up @@ -141,10 +145,13 @@ impl Default for KMeansParameters {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct KMeansSearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Number of clusters.
pub k: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// Maximum number of iterations of the k-means algorithm for a single run.
pub max_iter: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// Determines random number generation for centroid initialization.
/// Use an int to make the randomness deterministic
pub seed: Vec<Option<u64>>,
Expand Down
5 changes: 5 additions & 0 deletions src/decomposition/pca.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,14 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for PCA<T, M> {
}
}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// PCA parameters
pub struct PCAParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Number of components to keep.
pub n_components: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// By default, covariance matrix is used to compute principal components.
/// Enable this flag if you want to use correlation matrix instead.
pub use_correlation_matrix: bool,
Expand Down Expand Up @@ -120,8 +123,10 @@ impl Default for PCAParameters {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct PCASearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Number of components to keep.
pub n_components: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// By default, covariance matrix is used to compute principal components.
/// Enable this flag if you want to use correlation matrix instead.
pub use_correlation_matrix: Vec<bool>,
Expand Down
3 changes: 3 additions & 0 deletions src/decomposition/svd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ impl<T: RealNumber, M: Matrix<T>> PartialEq for SVD<T, M> {
}
}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
/// SVD parameters
pub struct SVDParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Number of components to keep.
pub n_components: usize,
}
Expand All @@ -94,6 +96,7 @@ impl SVDParameters {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct SVDSearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Maximum number of iterations of the k-means algorithm for a single run.
pub n_components: Vec<usize>,
}
Expand Down
16 changes: 16 additions & 0 deletions src/ensemble/random_forest_classifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,28 @@ use crate::tree::decision_tree_classifier::{
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RandomForestClassifierParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub criterion: SplitCriterion,
#[cfg_attr(feature = "serde", serde(default))]
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub max_depth: Option<u16>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_leaf: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_split: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of trees in the forest.
pub n_trees: u16,
#[cfg_attr(feature = "serde", serde(default))]
/// Number of random sample of predictors to use as split candidates.
pub m: Option<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: bool,
#[cfg_attr(feature = "serde", serde(default))]
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: u64,
}
Expand Down Expand Up @@ -198,20 +206,28 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestCla
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RandomForestClassifierSearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Split criteria to use when building a tree. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub criterion: Vec<SplitCriterion>,
#[cfg_attr(feature = "serde", serde(default))]
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub max_depth: Vec<Option<u16>>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_leaf: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_split: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of trees in the forest.
pub n_trees: Vec<u16>,
#[cfg_attr(feature = "serde", serde(default))]
/// Number of random sample of predictors to use as split candidates.
pub m: Vec<Option<usize>>,
#[cfg_attr(feature = "serde", serde(default))]
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: Vec<bool>,
#[cfg_attr(feature = "serde", serde(default))]
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: Vec<u64>,
}
Expand Down
14 changes: 14 additions & 0 deletions src/ensemble/random_forest_regressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,25 @@ use crate::tree::decision_tree_regressor::{
/// Parameters of the Random Forest Regressor
/// Some parameters here are passed directly into base estimator.
pub struct RandomForestRegressorParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Tree max depth. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
pub max_depth: Option<u16>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
pub min_samples_leaf: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to split an internal node. See [Decision Tree Regressor](../../tree/decision_tree_regressor/index.html)
pub min_samples_split: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of trees in the forest.
pub n_trees: usize,
#[cfg_attr(feature = "serde", serde(default))]
/// Number of random sample of predictors to use as split candidates.
pub m: Option<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: bool,
#[cfg_attr(feature = "serde", serde(default))]
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: u64,
}
Expand Down Expand Up @@ -181,18 +188,25 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for RandomForestReg
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct RandomForestRegressorSearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Tree max depth. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub max_depth: Vec<Option<u16>>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to be at a leaf node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_leaf: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// The minimum number of samples required to split an internal node. See [Decision Tree Classifier](../../tree/decision_tree_classifier/index.html)
pub min_samples_split: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// The number of trees in the forest.
pub n_trees: Vec<usize>,
#[cfg_attr(feature = "serde", serde(default))]
/// Number of random sample of predictors to use as split candidates.
pub m: Vec<Option<usize>>,
#[cfg_attr(feature = "serde", serde(default))]
/// Whether to keep samples used for tree generation. This is required for OOB prediction.
pub keep_samples: Vec<bool>,
#[cfg_attr(feature = "serde", serde(default))]
/// Seed used for bootstrap sampling and feature selection for each tree.
pub seed: Vec<u64>,
}
Expand Down
10 changes: 10 additions & 0 deletions src/linear/elastic_net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,21 @@ use crate::linear::lasso_optimizer::InteriorPointOptimizer;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct ElasticNetParameters<T: RealNumber> {
#[cfg_attr(feature = "serde", serde(default))]
/// Regularization parameter.
pub alpha: T,
#[cfg_attr(feature = "serde", serde(default))]
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
/// For l1_ratio = 0 the penalty is an L2 penalty.
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
pub l1_ratio: T,
#[cfg_attr(feature = "serde", serde(default))]
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
pub normalize: bool,
#[cfg_attr(feature = "serde", serde(default))]
/// The tolerance for the optimization
pub tol: T,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum number of iterations
pub max_iter: usize,
}
Expand Down Expand Up @@ -139,16 +144,21 @@ impl<T: RealNumber> Default for ElasticNetParameters<T> {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct ElasticNetSearchParameters<T: RealNumber> {
#[cfg_attr(feature = "serde", serde(default))]
/// Regularization parameter.
pub alpha: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// The elastic net mixing parameter, with 0 <= l1_ratio <= 1.
/// For l1_ratio = 0 the penalty is an L2 penalty.
/// For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
pub l1_ratio: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the standard deviation.
pub normalize: Vec<bool>,
#[cfg_attr(feature = "serde", serde(default))]
/// The tolerance for the optimization
pub tol: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum number of iterations
pub max_iter: Vec<usize>,
}
Expand Down
8 changes: 8 additions & 0 deletions src/linear/lasso.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,17 @@ use crate::math::num::RealNumber;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LassoParameters<T: RealNumber> {
#[cfg_attr(feature = "serde", serde(default))]
/// Controls the strength of the penalty to the loss function.
pub alpha: T,
#[cfg_attr(feature = "serde", serde(default))]
/// If true the regressors X will be normalized before regression
/// by subtracting the mean and dividing by the standard deviation.
pub normalize: bool,
#[cfg_attr(feature = "serde", serde(default))]
/// The tolerance for the optimization
pub tol: T,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum number of iterations
pub max_iter: usize,
}
Expand Down Expand Up @@ -116,13 +120,17 @@ impl<T: RealNumber, M: Matrix<T>> Predictor<M, M::RowVector> for Lasso<T, M> {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LassoSearchParameters<T: RealNumber> {
#[cfg_attr(feature = "serde", serde(default))]
/// Controls the strength of the penalty to the loss function.
pub alpha: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// If true the regressors X will be normalized before regression
/// by subtracting the mean and dividing by the standard deviation.
pub normalize: Vec<bool>,
#[cfg_attr(feature = "serde", serde(default))]
/// The tolerance for the optimization
pub tol: Vec<T>,
#[cfg_attr(feature = "serde", serde(default))]
/// The maximum number of iterations
pub max_iter: Vec<usize>,
}
Expand Down
19 changes: 9 additions & 10 deletions src/linear/linear_regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,21 @@ use crate::linalg::Matrix;
use crate::math::num::RealNumber;

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Eq, PartialEq)]
#[derive(Debug, Default, Clone, Eq, PartialEq)]
/// Approach to use for estimation of regression coefficients. QR is more efficient but SVD is more stable.
pub enum LinearRegressionSolverName {
/// QR decomposition, see [QR](../../linalg/qr/index.html)
QR,
#[default]
/// SVD decomposition, see [SVD](../../linalg/svd/index.html)
SVD,
}

/// Linear Regression parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone)]
pub struct LinearRegressionParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Solver to use for estimation of regression coefficients.
pub solver: LinearRegressionSolverName,
}
Expand All @@ -105,18 +107,11 @@ impl LinearRegressionParameters {
}
}

impl Default for LinearRegressionParameters {
fn default() -> Self {
LinearRegressionParameters {
solver: LinearRegressionSolverName::SVD,
}
}
}

/// Linear Regression grid search parameters
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct LinearRegressionSearchParameters {
#[cfg_attr(feature = "serde", serde(default))]
/// Solver to use for estimation of regression coefficients.
pub solver: Vec<LinearRegressionSolverName>,
}
Expand Down Expand Up @@ -353,5 +348,9 @@ mod tests {
serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap();

assert_eq!(lr, deserialized_lr);

let default = LinearRegressionParameters::default();
let parameters: LinearRegressionParameters = serde_json::from_str("{}").unwrap();
assert_eq!(parameters.solver, default.solver);
}
}
Loading