Skip to content

Commit 023b449

Browse files
authored
Merge pull request #1 from smartcorelib/development
update
2 parents cd44f1d + 1b42f8a commit 023b449

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+6158
-790
lines changed

.circleci/config.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ workflows:
66
jobs:
77
- build
88
- clippy
9+
- coverage
10+
911
jobs:
1012
build:
1113
docker:
@@ -21,10 +23,10 @@ jobs:
2123
command: cargo fmt -- --check
2224
- run:
2325
name: Stable Build
24-
command: cargo build --features "nalgebra-bindings ndarray-bindings"
26+
command: cargo build --all-features
2527
- run:
2628
name: Test
27-
command: cargo test --features "nalgebra-bindings ndarray-bindings"
29+
command: cargo test --all-features
2830
- save_cache:
2931
key: project-cache
3032
paths:
@@ -41,3 +43,17 @@ jobs:
4143
- run:
4244
name: Run cargo clippy
4345
command: cargo clippy --all-features -- -Drust-2018-idioms -Dwarnings
46+
47+
coverage:
48+
machine: true
49+
steps:
50+
- checkout
51+
- run:
52+
name: Generate report
53+
command: >
54+
docker run --security-opt seccomp=unconfined -v $PWD:/volume
55+
xd009642/tarpaulin:latest-nightly cargo tarpaulin -v --ciserver circle-ci
56+
--out Lcov --all-features -- --test-threads 1
57+
- run:
58+
name: Upload
59+
command: bash <(curl -s https://codecov.io/bash) -Z -f

Cargo.toml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "smartcore"
33
description = "The most advanced machine learning library in rust."
44
homepage = "https://smartcorelib.org"
5-
version = "0.1.0"
5+
version = "0.2.0"
66
authors = ["SmartCore Developers"]
77
edition = "2018"
88
license = "Apache-2.0"
@@ -19,14 +19,13 @@ nalgebra-bindings = ["nalgebra"]
1919
datasets = []
2020

2121
[dependencies]
22-
ndarray = { version = "0.13", optional = true }
23-
nalgebra = { version = "0.22.0", optional = true }
22+
ndarray = { version = "0.14", optional = true }
23+
nalgebra = { version = "0.23.0", optional = true }
2424
num-traits = "0.2.12"
2525
num = "0.3.0"
2626
rand = "0.7.3"
2727
rand_distr = "0.3.0"
28-
serde = { version = "1.0.115", features = ["derive"] }
29-
serde_derive = "1.0.115"
28+
serde = { version = "1.0.115", features = ["derive"], optional = true }
3029

3130
[dev-dependencies]
3231
criterion = "0.3"
@@ -35,4 +34,9 @@ bincode = "1.3.1"
3534

3635
[[bench]]
3736
name = "distance"
38-
harness = false
37+
harness = false
38+
39+
[[bench]]
40+
name = "naive_bayes"
41+
harness = false
42+
required-features = ["ndarray-bindings", "nalgebra-bindings"]

benches/naive_bayes.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use criterion::BenchmarkId;
2+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
3+
4+
use nalgebra::DMatrix;
5+
use ndarray::Array2;
6+
use smartcore::linalg::naive::dense_matrix::DenseMatrix;
7+
use smartcore::linalg::BaseMatrix;
8+
use smartcore::linalg::BaseVector;
9+
use smartcore::naive_bayes::gaussian::GaussianNB;
10+
11+
pub fn gaussian_naive_bayes_fit_benchmark(c: &mut Criterion) {
12+
let mut group = c.benchmark_group("GaussianNB::fit");
13+
14+
for n_samples in [100_usize, 1000_usize, 10000_usize].iter() {
15+
for n_features in [10_usize, 100_usize, 1000_usize].iter() {
16+
let x = DenseMatrix::<f64>::rand(*n_samples, *n_features);
17+
let y: Vec<f64> = (0..*n_samples)
18+
.map(|i| (i % *n_samples / 5_usize) as f64)
19+
.collect::<Vec<f64>>();
20+
group.bench_with_input(
21+
BenchmarkId::from_parameter(format!(
22+
"n_samples: {}, n_features: {}",
23+
n_samples, n_features
24+
)),
25+
n_samples,
26+
|b, _| {
27+
b.iter(|| {
28+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
29+
})
30+
},
31+
);
32+
}
33+
}
34+
group.finish();
35+
}
36+
37+
pub fn gaussian_naive_matrix_datastructure(c: &mut Criterion) {
38+
let mut group = c.benchmark_group("GaussianNB");
39+
let classes = (0..10000).map(|i| (i % 25) as f64).collect::<Vec<f64>>();
40+
41+
group.bench_function("DenseMatrix", |b| {
42+
let x = DenseMatrix::<f64>::rand(10000, 500);
43+
let y = <DenseMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
44+
45+
b.iter(|| {
46+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
47+
})
48+
});
49+
50+
group.bench_function("ndarray", |b| {
51+
let x = Array2::<f64>::rand(10000, 500);
52+
let y = <Array2<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
53+
54+
b.iter(|| {
55+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
56+
})
57+
});
58+
59+
group.bench_function("ndalgebra", |b| {
60+
let x = DMatrix::<f64>::rand(10000, 500);
61+
let y = <DMatrix<f64> as BaseMatrix<f64>>::RowVector::from_array(&classes);
62+
63+
b.iter(|| {
64+
GaussianNB::fit(black_box(&x), black_box(&y), Default::default()).unwrap();
65+
})
66+
});
67+
}
68+
criterion_group!(
69+
benches,
70+
gaussian_naive_bayes_fit_benchmark,
71+
gaussian_naive_matrix_datastructure
72+
);
73+
criterion_main!(benches);

src/algorithm/neighbour/bbd_tree.rs

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,7 @@ impl<T: RealNumber> BBDTree<T> {
4444

4545
let (n, _) = data.shape();
4646

47-
let mut index = vec![0; n];
48-
for i in 0..n {
49-
index[i] = i;
50-
}
47+
let index = (0..n).collect::<Vec<_>>();
5148

5249
let mut tree = BBDTree {
5350
nodes,
@@ -64,7 +61,7 @@ impl<T: RealNumber> BBDTree<T> {
6461

6562
pub(in crate) fn clustering(
6663
&self,
67-
centroids: &Vec<Vec<T>>,
64+
centroids: &[Vec<T>],
6865
sums: &mut Vec<Vec<T>>,
6966
counts: &mut Vec<usize>,
7067
membership: &mut Vec<usize>,
@@ -92,8 +89,8 @@ impl<T: RealNumber> BBDTree<T> {
9289
fn filter(
9390
&self,
9491
node: usize,
95-
centroids: &Vec<Vec<T>>,
96-
candidates: &Vec<usize>,
92+
centroids: &[Vec<T>],
93+
candidates: &[usize],
9794
k: usize,
9895
sums: &mut Vec<Vec<T>>,
9996
counts: &mut Vec<usize>,
@@ -117,15 +114,15 @@ impl<T: RealNumber> BBDTree<T> {
117114
let mut new_candidates = vec![0; k];
118115
let mut newk = 0;
119116

120-
for i in 0..k {
117+
for candidate in candidates.iter().take(k) {
121118
if !BBDTree::prune(
122119
&self.nodes[node].center,
123120
&self.nodes[node].radius,
124121
centroids,
125122
closest,
126-
candidates[i],
123+
*candidate,
127124
) {
128-
new_candidates[newk] = candidates[i];
125+
new_candidates[newk] = *candidate;
129126
newk += 1;
130127
}
131128
}
@@ -166,9 +163,9 @@ impl<T: RealNumber> BBDTree<T> {
166163
}
167164

168165
fn prune(
169-
center: &Vec<T>,
170-
radius: &Vec<T>,
171-
centroids: &Vec<Vec<T>>,
166+
center: &[T],
167+
radius: &[T],
168+
centroids: &[Vec<T>],
172169
best_index: usize,
173170
test_index: usize,
174171
) -> bool {
@@ -285,8 +282,8 @@ impl<T: RealNumber> BBDTree<T> {
285282
}
286283

287284
let mut mean = vec![T::zero(); d];
288-
for i in 0..d {
289-
mean[i] = node.sum[i] / T::from(node.count).unwrap();
285+
for (i, mean_i) in mean.iter_mut().enumerate().take(d) {
286+
*mean_i = node.sum[i] / T::from(node.count).unwrap();
290287
}
291288

292289
node.cost = BBDTree::node_cost(&self.nodes[node.lower.unwrap()], &mean)
@@ -295,11 +292,11 @@ impl<T: RealNumber> BBDTree<T> {
295292
self.add_node(node)
296293
}
297294

298-
fn node_cost(node: &BBDTreeNode<T>, center: &Vec<T>) -> T {
295+
fn node_cost(node: &BBDTreeNode<T>, center: &[T]) -> T {
299296
let d = center.len();
300297
let mut scatter = T::zero();
301-
for i in 0..d {
302-
let x = (node.sum[i] / T::from(node.count).unwrap()) - center[i];
298+
for (i, center_i) in center.iter().enumerate().take(d) {
299+
let x = (node.sum[i] / T::from(node.count).unwrap()) - *center_i;
303300
scatter += x * x;
304301
}
305302
node.cost + T::from(node.count).unwrap() * scatter

src/algorithm/neighbour/cover_tree.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//! use smartcore::algorithm::neighbour::cover_tree::*;
77
//! use smartcore::math::distance::Distance;
88
//!
9+
//! #[derive(Clone)]
910
//! struct SimpleDistance {} // Our distance function
1011
//!
1112
//! impl Distance<i32, f64> for SimpleDistance {
@@ -23,6 +24,7 @@
2324
//! ```
2425
use std::fmt::Debug;
2526

27+
#[cfg(feature = "serde")]
2628
use serde::{Deserialize, Serialize};
2729

2830
use crate::algorithm::sort::heap_select::HeapSelection;
@@ -31,7 +33,8 @@ use crate::math::distance::Distance;
3133
use crate::math::num::RealNumber;
3234

3335
/// Implements Cover Tree algorithm
34-
#[derive(Serialize, Deserialize, Debug)]
36+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
37+
#[derive(Debug)]
3538
pub struct CoverTree<T, F: RealNumber, D: Distance<T, F>> {
3639
base: F,
3740
inv_log_base: F,
@@ -55,7 +58,8 @@ impl<T, F: RealNumber, D: Distance<T, F>> PartialEq for CoverTree<T, F, D> {
5558
}
5659
}
5760

58-
#[derive(Debug, Serialize, Deserialize)]
61+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
62+
#[derive(Debug)]
5963
struct Node<F: RealNumber> {
6064
idx: usize,
6165
max_dist: F,
@@ -64,7 +68,7 @@ struct Node<F: RealNumber> {
6468
scale: i64,
6569
}
6670

67-
#[derive(Debug, Serialize, Deserialize)]
71+
#[derive(Debug)]
6872
struct DistanceSet<F: RealNumber> {
6973
idx: usize,
7074
dist: Vec<F>,
@@ -436,7 +440,7 @@ impl<T: Debug + PartialEq, F: RealNumber, D: Distance<T, F>> CoverTree<T, F, D>
436440
}
437441
}
438442

439-
fn max(&self, distance_set: &Vec<DistanceSet<F>>) -> F {
443+
fn max(&self, distance_set: &[DistanceSet<F>]) -> F {
440444
let mut max = F::zero();
441445
for n in distance_set {
442446
if max < n.dist[n.dist.len() - 1] {
@@ -453,7 +457,8 @@ mod tests {
453457
use super::*;
454458
use crate::math::distance::Distances;
455459

456-
#[derive(Debug, Serialize, Deserialize)]
460+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
461+
#[derive(Debug, Clone)]
457462
struct SimpleDistance {}
458463

459464
impl Distance<i32, f64> for SimpleDistance {
@@ -499,6 +504,7 @@ mod tests {
499504
}
500505

501506
#[test]
507+
#[cfg(feature = "serde")]
502508
fn serde() {
503509
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
504510

src/algorithm/neighbour/linear_search.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! use smartcore::algorithm::neighbour::linear_search::*;
66
//! use smartcore::math::distance::Distance;
77
//!
8+
//! #[derive(Clone)]
89
//! struct SimpleDistance {} // Our distance function
910
//!
1011
//! impl Distance<i32, f64> for SimpleDistance {
@@ -21,6 +22,7 @@
2122
//!
2223
//! ```
2324
25+
#[cfg(feature = "serde")]
2426
use serde::{Deserialize, Serialize};
2527
use std::cmp::{Ordering, PartialOrd};
2628
use std::marker::PhantomData;
@@ -31,7 +33,8 @@ use crate::math::distance::Distance;
3133
use crate::math::num::RealNumber;
3234

3335
/// Implements Linear Search algorithm, see [KNN algorithms](../index.html)
34-
#[derive(Serialize, Deserialize, Debug)]
36+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
37+
#[derive(Debug)]
3538
pub struct LinearKNNSearch<T, F: RealNumber, D: Distance<T, F>> {
3639
distance: D,
3740
data: Vec<T>,
@@ -137,6 +140,8 @@ mod tests {
137140
use super::*;
138141
use crate::math::distance::Distances;
139142

143+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
144+
#[derive(Debug, Clone)]
140145
struct SimpleDistance {}
141146

142147
impl Distance<i32, f64> for SimpleDistance {

src/algorithm/neighbour/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![allow(clippy::ptr_arg)]
12
//! # Nearest Neighbors Search Algorithms and Data Structures
23
//!
34
//! Nearest neighbor search is a basic computational tool that is particularly relevant to machine learning,
@@ -34,6 +35,7 @@ use crate::algorithm::neighbour::linear_search::LinearKNNSearch;
3435
use crate::error::Failed;
3536
use crate::math::distance::Distance;
3637
use crate::math::num::RealNumber;
38+
#[cfg(feature = "serde")]
3739
use serde::{Deserialize, Serialize};
3840

3941
pub(crate) mod bbd_tree;
@@ -44,15 +46,17 @@ pub mod linear_search;
4446

4547
/// Both, KNN classifier and regressor benefits from underlying search algorithms that helps to speed up queries.
4648
/// `KNNAlgorithmName` maintains a list of supported search algorithms, see [KNN algorithms](../algorithm/neighbour/index.html)
47-
#[derive(Serialize, Deserialize, Debug, Clone)]
49+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
50+
#[derive(Debug, Clone)]
4851
pub enum KNNAlgorithmName {
4952
/// Heap Search algorithm, see [`LinearSearch`](../algorithm/neighbour/linear_search/index.html)
5053
LinearSearch,
5154
/// Cover Tree Search algorithm, see [`CoverTree`](../algorithm/neighbour/cover_tree/index.html)
5255
CoverTree,
5356
}
5457

55-
#[derive(Serialize, Deserialize, Debug)]
58+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
59+
#[derive(Debug)]
5660
pub(crate) enum KNNAlgorithm<T: RealNumber, D: Distance<Vec<T>, T>> {
5761
LinearSearch(LinearKNNSearch<Vec<T>, T, D>),
5862
CoverTree(CoverTree<Vec<T>, T, D>),

0 commit comments

Comments
 (0)