Skip to content

Singular Value Decomposition (SVD) #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/decomposition/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@

/// PCA is a popular approach for deriving a low-dimensional set of features from a large set of variables.
pub mod pca;
pub mod svd;
28 changes: 28 additions & 0 deletions src/decomposition/pca.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
) -> Result<PCA<T, M>, Failed> {
let (m, n) = data.shape();

if n_components > n {
return Err(Failed::fit(&format!(
"Number of components, n_components should be <= number of attributes ({})",
n
)));
}

let mu = data.column_mean();

let mut x = data.clone();
Expand Down Expand Up @@ -224,6 +231,11 @@ impl<T: RealNumber, M: Matrix<T>> PCA<T, M> {
}
Ok(x_transformed)
}

/// Get a projection matrix
pub fn components(&self) -> &M {
&self.projection
}
}

#[cfg(test)]
Expand Down Expand Up @@ -286,6 +298,22 @@ mod tests {
])
}

#[test]
fn pca_components() {
let us_arrests = us_arrests_data();

let expected = DenseMatrix::from_2d_array(&[
&[0.0417, 0.0448],
&[0.9952, 0.0588],
&[0.0463, 0.9769],
&[0.0752, 0.2007],
]);

let pca = PCA::fit(&us_arrests, 2, Default::default()).unwrap();

assert!(expected.approximate_eq(&pca.components().abs(), 0.4));
}

#[test]
fn decompose_covariance() {
let us_arrests = us_arrests_data();
Expand Down
235 changes: 235 additions & 0 deletions src/decomposition/svd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
//! # Dimensionality reduction using SVD
//!
//! Similar to [`PCA`](../pca/index.html), SVD is a technique that can be used to reduce the number of input variables _p_ to a smaller number _k_, while preserving
//! the most important structure or relationships between the variables observed in the data.
//!
//! Contrary to PCA, SVD does not center the data before computing the singular value decomposition.
//!
//! Example:
//! ```
//! use smartcore::linalg::naive::dense_matrix::*;
//! use smartcore::decomposition::svd::*;
//!
//! // Iris data
//! let iris = DenseMatrix::from_2d_array(&[
//! &[5.1, 3.5, 1.4, 0.2],
//! &[4.9, 3.0, 1.4, 0.2],
//! &[4.7, 3.2, 1.3, 0.2],
//! &[4.6, 3.1, 1.5, 0.2],
//! &[5.0, 3.6, 1.4, 0.2],
//! &[5.4, 3.9, 1.7, 0.4],
//! &[4.6, 3.4, 1.4, 0.3],
//! &[5.0, 3.4, 1.5, 0.2],
//! &[4.4, 2.9, 1.4, 0.2],
//! &[4.9, 3.1, 1.5, 0.1],
//! &[7.0, 3.2, 4.7, 1.4],
//! &[6.4, 3.2, 4.5, 1.5],
//! &[6.9, 3.1, 4.9, 1.5],
//! &[5.5, 2.3, 4.0, 1.3],
//! &[6.5, 2.8, 4.6, 1.5],
//! &[5.7, 2.8, 4.5, 1.3],
//! &[6.3, 3.3, 4.7, 1.6],
//! &[4.9, 2.4, 3.3, 1.0],
//! &[6.6, 2.9, 4.6, 1.3],
//! &[5.2, 2.7, 3.9, 1.4],
//! ]);
//!
//! let svd = SVD::fit(&iris, 2, Default::default()).unwrap(); // Reduce number of features to 2
//!
//! let iris_reduced = svd.transform(&iris).unwrap();
//!
//! ```
//!
//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
use std::fmt::Debug;
use std::marker::PhantomData;

use serde::{Deserialize, Serialize};

use crate::error::Failed;
use crate::linalg::Matrix;
use crate::math::num::RealNumber;

/// SVD
#[derive(Serialize, Deserialize, Debug)]
pub struct SVD<T: RealNumber, M: Matrix<T>> {
components: M,
phantom: PhantomData<T>,
}

impl<T: RealNumber, M: Matrix<T>> PartialEq for SVD<T, M> {
fn eq(&self, other: &Self) -> bool {
self.components
.approximate_eq(&other.components, T::from_f64(1e-8).unwrap())
}
}

#[derive(Debug, Clone)]
/// SVD parameters
pub struct SVDParameters {}

impl Default for SVDParameters {
fn default() -> Self {
SVDParameters {}
}
}

impl<T: RealNumber, M: Matrix<T>> SVD<T, M> {
/// Fits SVD to your data.
/// * `data` - _NxM_ matrix with _N_ observations and _M_ features in each observation.
/// * `n_components` - number of components to keep.
/// * `parameters` - other parameters, use `Default::default()` to set parameters to default values.
pub fn fit(x: &M, n_components: usize, _: SVDParameters) -> Result<SVD<T, M>, Failed> {
let (_, p) = x.shape();

if n_components >= p {
return Err(Failed::fit(&format!(
"Number of components, n_components should be < number of attributes ({})",
p
)));
}

let svd = x.svd()?;

let components = svd.V.slice(0..p, 0..n_components);

Ok(SVD {
components,
phantom: PhantomData,
})
}

/// Run dimensionality reduction for `x`
/// * `x` - _KxM_ data where _K_ is number of observations and _M_ is number of features.
pub fn transform(&self, x: &M) -> Result<M, Failed> {
let (n, p) = x.shape();
let (p_c, k) = self.components.shape();
if p_c != p {
return Err(Failed::transform(&format!(
"Can not transform a {}x{} matrix into {}x{} matrix, incorrect input dimentions",
n, p, n, k
)));
}

Ok(x.matmul(&self.components))
}

/// Get a projection matrix
pub fn components(&self) -> &M {
&self.components
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::naive::dense_matrix::*;

#[test]
fn svd_decompose() {
// https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/USArrests.html
let x = DenseMatrix::from_2d_array(&[
&[13.2, 236.0, 58.0, 21.2],
&[10.0, 263.0, 48.0, 44.5],
&[8.1, 294.0, 80.0, 31.0],
&[8.8, 190.0, 50.0, 19.5],
&[9.0, 276.0, 91.0, 40.6],
&[7.9, 204.0, 78.0, 38.7],
&[3.3, 110.0, 77.0, 11.1],
&[5.9, 238.0, 72.0, 15.8],
&[15.4, 335.0, 80.0, 31.9],
&[17.4, 211.0, 60.0, 25.8],
&[5.3, 46.0, 83.0, 20.2],
&[2.6, 120.0, 54.0, 14.2],
&[10.4, 249.0, 83.0, 24.0],
&[7.2, 113.0, 65.0, 21.0],
&[2.2, 56.0, 57.0, 11.3],
&[6.0, 115.0, 66.0, 18.0],
&[9.7, 109.0, 52.0, 16.3],
&[15.4, 249.0, 66.0, 22.2],
&[2.1, 83.0, 51.0, 7.8],
&[11.3, 300.0, 67.0, 27.8],
&[4.4, 149.0, 85.0, 16.3],
&[12.1, 255.0, 74.0, 35.1],
&[2.7, 72.0, 66.0, 14.9],
&[16.1, 259.0, 44.0, 17.1],
&[9.0, 178.0, 70.0, 28.2],
&[6.0, 109.0, 53.0, 16.4],
&[4.3, 102.0, 62.0, 16.5],
&[12.2, 252.0, 81.0, 46.0],
&[2.1, 57.0, 56.0, 9.5],
&[7.4, 159.0, 89.0, 18.8],
&[11.4, 285.0, 70.0, 32.1],
&[11.1, 254.0, 86.0, 26.1],
&[13.0, 337.0, 45.0, 16.1],
&[0.8, 45.0, 44.0, 7.3],
&[7.3, 120.0, 75.0, 21.4],
&[6.6, 151.0, 68.0, 20.0],
&[4.9, 159.0, 67.0, 29.3],
&[6.3, 106.0, 72.0, 14.9],
&[3.4, 174.0, 87.0, 8.3],
&[14.4, 279.0, 48.0, 22.5],
&[3.8, 86.0, 45.0, 12.8],
&[13.2, 188.0, 59.0, 26.9],
&[12.7, 201.0, 80.0, 25.5],
&[3.2, 120.0, 80.0, 22.9],
&[2.2, 48.0, 32.0, 11.2],
&[8.5, 156.0, 63.0, 20.7],
&[4.0, 145.0, 73.0, 26.2],
&[5.7, 81.0, 39.0, 9.3],
&[2.6, 53.0, 66.0, 10.8],
&[6.8, 161.0, 60.0, 15.6],
]);

let expected = DenseMatrix::from_2d_array(&[
&[243.54655757, -18.76673788],
&[268.36802004, -33.79304302],
&[305.93972467, -15.39087376],
&[197.28420365, -11.66808306],
&[293.43187394, 1.91163633],
]);
let svd = SVD::fit(&x, 2, Default::default()).unwrap();

let x_transformed = svd.transform(&x).unwrap();

assert_eq!(svd.components.shape(), (x.shape().1, 2));

assert!(x_transformed
.slice(0..5, 0..2)
.approximate_eq(&expected, 1e-4));
}

#[test]
fn serde() {
let iris = DenseMatrix::from_2d_array(&[
&[5.1, 3.5, 1.4, 0.2],
&[4.9, 3.0, 1.4, 0.2],
&[4.7, 3.2, 1.3, 0.2],
&[4.6, 3.1, 1.5, 0.2],
&[5.0, 3.6, 1.4, 0.2],
&[5.4, 3.9, 1.7, 0.4],
&[4.6, 3.4, 1.4, 0.3],
&[5.0, 3.4, 1.5, 0.2],
&[4.4, 2.9, 1.4, 0.2],
&[4.9, 3.1, 1.5, 0.1],
&[7.0, 3.2, 4.7, 1.4],
&[6.4, 3.2, 4.5, 1.5],
&[6.9, 3.1, 4.9, 1.5],
&[5.5, 2.3, 4.0, 1.3],
&[6.5, 2.8, 4.6, 1.5],
&[5.7, 2.8, 4.5, 1.3],
&[6.3, 3.3, 4.7, 1.6],
&[4.9, 2.4, 3.3, 1.0],
&[6.6, 2.9, 4.6, 1.3],
&[5.2, 2.7, 3.9, 1.4],
]);

let svd = SVD::fit(&iris, 2, Default::default()).unwrap();

let deserialized_svd: SVD<f64, DenseMatrix<f64>> =
serde_json::from_str(&serde_json::to_string(&svd).unwrap()).unwrap();

assert_eq!(svd, deserialized_svd);
}
}
3 changes: 3 additions & 0 deletions src/linalg/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ pub trait BaseVector<T: RealNumber>: Clone + Debug {
fn std(&self) -> T {
self.var().sqrt()
}

/// Copies content of `other` vector.
fn copy_from(&mut self, other: &Self);
}

/// Generic matrix type.
Expand Down
32 changes: 29 additions & 3 deletions src/linalg/naive/dense_matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,18 @@ impl<T: RealNumber> BaseVector<T> for Vec<T> {
result.dedup();
result
}

fn copy_from(&mut self, other: &Self) {
if self.len() != other.len() {
panic!(
"Can't copy vector of length {} into a vector of length {}.",
self.len(),
other.len()
);
}

self[..].clone_from_slice(&other[..]);
}
}

/// Column-major, dense matrix. See [Simple Dense Matrix](../index.html).
Expand Down Expand Up @@ -915,9 +927,7 @@ impl<T: RealNumber> BaseMatrix<T> for DenseMatrix<T> {
);
}

for i in 0..self.values.len() {
self.values[i] = other.values[i];
}
self.values[..].clone_from_slice(&other.values[..]);
}

fn abs_mut(&mut self) -> &Self {
Expand Down Expand Up @@ -1052,6 +1062,14 @@ mod tests {
assert_eq!(32.0, BaseVector::dot(&v1, &v2));
}

#[test]
fn vec_copy_from() {
let mut v1 = vec![1., 2., 3.];
let v2 = vec![4., 5., 6.];
v1.copy_from(&v2);
assert_eq!(v1, v2);
}

#[test]
fn vec_approximate_eq() {
let a = vec![1., 2., 3.];
Expand Down Expand Up @@ -1185,6 +1203,14 @@ mod tests {
assert_eq!(a.dot(&b), 32.);
}

#[test]
fn copy_from() {
let mut a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]);
let b = DenseMatrix::from_2d_array(&[&[7., 8.], &[9., 10.], &[11., 12.]]);
a.copy_from(&b);
assert_eq!(a, b);
}

#[test]
fn slice() {
let m = DenseMatrix::from_2d_array(&[
Expand Down
14 changes: 14 additions & 0 deletions src/linalg/nalgebra_bindings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ impl<T: RealNumber + 'static> BaseVector<T> for MatrixMN<T, U1, Dynamic> {
result.dedup();
result
}

fn copy_from(&mut self, other: &Self) {
Matrix::copy_from(self, other);
}
}

impl<T: RealNumber + Scalar + AddAssign + SubAssign + MulAssign + DivAssign + Sum + 'static>
Expand Down Expand Up @@ -575,6 +579,16 @@ mod tests {
use crate::linear::linear_regression::*;
use nalgebra::{DMatrix, Matrix2x3, RowDVector};

#[test]
fn vec_copy_from() {
let mut v1 = RowDVector::from_vec(vec![1., 2., 3.]);
let mut v2 = RowDVector::from_vec(vec![4., 5., 6.]);
v1.copy_from(&v2);
assert_eq!(v2, v1);
v2[0] = 10.0;
assert_ne!(v2, v1);
}

#[test]
fn vec_len() {
let v = RowDVector::from_vec(vec![1., 2., 3.]);
Expand Down
Loading