Skip to content

Commit

Permalink
Iter fold (#77)
Browse files Browse the repository at this point in the history
* fold_fit poc
removed fit dataset lifetime, finished fold_fit
tentative trait impl for kernel
adjusted svm for iter_fold

* updated elasticnet to new interface

* fmt

* removed fittable type from iter_fold definition

* fmt

* removed commented parameters
  • Loading branch information
Sauro98 authored Feb 1, 2021
1 parent 923b82f commit b0af805
Show file tree
Hide file tree
Showing 20 changed files with 1,148 additions and 486 deletions.
2 changes: 1 addition & 1 deletion linfa-bayes/src/gaussian_nb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ where
/// # Ok(())
/// # }
/// ```
fn fit(&self, dataset: &'a DatasetBase<ArrayView2<A>, L>) -> Self::Object {
fn fit(&self, dataset: &DatasetBase<ArrayView2<A>, L>) -> Self::Object {
// We extract the unique classes in sorted order
let mut unique_classes = dataset.targets.labels();
unique_classes.sort_unstable();
Expand Down
2 changes: 1 addition & 1 deletion linfa-elasticnet/src/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ impl<'a, F: Float + AbsDiffEq + Lapack, D: Data<Elem = F>, D2: Data<Elem = F>>
/// for new feature values.
fn fit(
&self,
dataset: &'a DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D2, Ix1>>,
dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D2, Ix1>>,
) -> Result<ElasticNet<F>> {
self.validate_params()?;

Expand Down
2 changes: 1 addition & 1 deletion linfa-hierarchical/examples/irisflower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn main() -> Result<(), Box<dyn Error>> {

let kernel = Kernel::params()
.method(KernelMethod::Gaussian(1.0))
.transform(dataset.records());
.transform(dataset.records().view());

let kernel = HierarchicalCluster::default()
.num_clusters(3)
Expand Down
23 changes: 8 additions & 15 deletions linfa-hierarchical/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use std::collections::HashMap;

use kodama::linkage;
pub use kodama::Method;
use ndarray::ArrayView2;

use linfa::dataset::{DatasetBase, Targets};
use linfa::traits::Transformer;
Expand Down Expand Up @@ -58,17 +57,13 @@ impl<F: Float> HierarchicalCluster<F> {
}
}

impl<'b: 'a, 'a, F: Float>
Transformer<Kernel<ArrayView2<'a, F>>, DatasetBase<Kernel<ArrayView2<'a, F>>, Vec<usize>>>
impl<'b: 'a, 'a, F: Float> Transformer<Kernel<'a, F>, DatasetBase<Kernel<'a, F>, Vec<usize>>>
for HierarchicalCluster<F>
{
/// Perform hierarchical clustering of a similarity matrix
///
/// Returns the class id for each data point
fn transform(
&self,
kernel: Kernel<ArrayView2<'a, F>>,
) -> DatasetBase<Kernel<ArrayView2<'a, F>>, Vec<usize>> {
fn transform(&self, kernel: Kernel<'a, F>) -> DatasetBase<Kernel<'a, F>, Vec<usize>> {
// ignore all similarities below this value
let threshold = F::from(1e-6).unwrap();

Expand Down Expand Up @@ -135,18 +130,16 @@ impl<'b: 'a, 'a, F: Float>
}

impl<'a, F: Float, T: Targets>
Transformer<
DatasetBase<Kernel<ArrayView2<'a, F>>, T>,
DatasetBase<Kernel<ArrayView2<'a, F>>, Vec<usize>>,
> for HierarchicalCluster<F>
Transformer<DatasetBase<Kernel<'a, F>, T>, DatasetBase<Kernel<'a, F>, Vec<usize>>>
for HierarchicalCluster<F>
{
/// Perform hierarchical clustering of a similarity matrix
///
/// Returns the class id for each data point
fn transform(
&self,
dataset: DatasetBase<Kernel<ArrayView2<'a, F>>, T>,
) -> DatasetBase<Kernel<ArrayView2<'a, F>>, Vec<usize>> {
dataset: DatasetBase<Kernel<'a, F>, T>,
) -> DatasetBase<Kernel<'a, F>, Vec<usize>> {
//let Dataset { records, .. } = dataset;
self.transform(dataset.records)
}
Expand Down Expand Up @@ -188,7 +181,7 @@ mod tests {

let kernel = Kernel::params()
.method(KernelMethod::Gaussian(5.0))
.transform(&entries);
.transform(entries.view());

let kernel = HierarchicalCluster::default()
.max_distance(0.1)
Expand Down Expand Up @@ -243,7 +236,7 @@ mod tests {

let kernel = Kernel::params()
.method(KernelMethod::Linear)
.transform(&data);
.transform(data.view());

dbg!(&kernel.to_upper_triangle());
let predictions = HierarchicalCluster::default()
Expand Down
2 changes: 1 addition & 1 deletion linfa-kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ features = ["std", "derive"]

[dependencies]
ndarray = "0.13"
sprs = { version = "0.9", default-features = false }
sprs = { version = "0.9.3", default-features = false }
hnsw = "0.6"
space = "0.10"

Expand Down
143 changes: 143 additions & 0 deletions linfa-kernel/src/inner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use linfa::Float;
use ndarray::prelude::*;
use ndarray::Data;
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};
use sprs::{CsMat, CsMatView};
use std::ops::Mul;

pub trait Inner {
type Elem: Float;

fn dot(&self, rhs: &ArrayView2<Self::Elem>) -> Array2<Self::Elem>;
fn sum(&self) -> Array1<Self::Elem>;
fn size(&self) -> usize;
fn column(&self, i: usize) -> Vec<Self::Elem>;
fn to_upper_triangle(&self) -> Vec<Self::Elem>;
fn is_dense(&self) -> bool;
fn diagonal(&self) -> Array1<Self::Elem>;
}

pub enum KernelInner<K1: Inner, K2: Inner> {
Dense(K1),
Sparse(K2),
}

impl<F: Float, D: Data<Elem = F>> Inner for ArrayBase<D, Ix2> {
type Elem = F;

fn dot(&self, rhs: &ArrayView2<F>) -> Array2<F> {
self.dot(rhs)
}
fn sum(&self) -> Array1<F> {
self.sum_axis(Axis(1))
}
fn size(&self) -> usize {
self.ncols()
}
fn column(&self, i: usize) -> Vec<F> {
self.column(i).to_vec()
}
fn to_upper_triangle(&self) -> Vec<F> {
self.indexed_iter()
.filter(|((row, col), _)| col > row)
.map(|(_, val)| *val)
.collect()
}

fn diagonal(&self) -> Array1<F> {
self.diag().to_owned()
}

fn is_dense(&self) -> bool {
true
}
}

impl<F: Float> Inner for CsMat<F> {
type Elem = F;

fn dot(&self, rhs: &ArrayView2<F>) -> Array2<F> {
self.mul(rhs)
}
fn sum(&self) -> Array1<F> {
let mut sum = Array1::zeros(self.cols());
for (val, i) in self.iter() {
let (_, col) = i;
sum[col] += *val;
}

sum
}
fn size(&self) -> usize {
self.cols()
}
fn column(&self, i: usize) -> Vec<F> {
(0..self.size())
.map(|j| *self.get(j, i).unwrap_or(&F::neg_zero()))
.collect::<Vec<_>>()
}
fn to_upper_triangle(&self) -> Vec<F> {
let mat = self.to_dense();
mat.indexed_iter()
.filter(|((row, col), _)| col > row)
.map(|(_, val)| *val)
.collect()
}

fn diagonal(&self) -> Array1<F> {
let diag_sprs = self.diag();
let mut diag = Array1::zeros(diag_sprs.dim());
for (sparse_i, sparse_elem) in diag_sprs.iter() {
diag[sparse_i] = *sparse_elem;
}
diag
}

fn is_dense(&self) -> bool {
false
}
}

impl<'a, F: Float> Inner for CsMatView<'a, F> {
type Elem = F;

fn dot(&self, rhs: &ArrayView2<F>) -> Array2<F> {
self.mul(rhs)
}
fn sum(&self) -> Array1<F> {
let mut sum = Array1::zeros(self.cols());
for (val, i) in self.iter() {
let (_, col) = i;
sum[col] += *val;
}

sum
}
fn size(&self) -> usize {
self.cols()
}
fn column(&self, i: usize) -> Vec<F> {
(0..self.size())
.map(|j| *self.get(j, i).unwrap_or(&F::neg_zero()))
.collect::<Vec<_>>()
}
fn to_upper_triangle(&self) -> Vec<F> {
let mat = self.to_dense();
mat.indexed_iter()
.filter(|((row, col), _)| col > row)
.map(|(_, val)| *val)
.collect()
}
fn diagonal(&self) -> Array1<F> {
let diag_sprs = self.diag();
let mut diag = Array1::zeros(diag_sprs.dim());
for (sparse_i, sparse_elem) in diag_sprs.iter() {
diag[sparse_i] = *sparse_elem;
}
diag
}
fn is_dense(&self) -> bool {
false
}
}
Loading

0 comments on commit b0af805

Please sign in to comment.