Skip to content

Commit

Permalink
🕳️ handle gaps in x (#20)
Browse files Browse the repository at this point in the history
* 🕳️ handle gaps with minmax downsampler

* 🎀 use Option for searchsorted to handle gaps (#25)

* 🎀 parallel searchsorted Option return

* 🧹

* 🧹 match -> if let

* 🧹

* ♻️ cleanup code

* 🐛 x searchsorted bugfix

* 🔥 add test for gaps in x

* 🧹 cleanup code
  • Loading branch information
jvdd authored Jan 28, 2023
1 parent b6291cb commit aeeb8ad
Show file tree
Hide file tree
Showing 9 changed files with 434 additions and 99 deletions.
2 changes: 1 addition & 1 deletion downsample_rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ num-traits = { version = "0.2.15", default-features = false }
rayon = { version = "1.6.0", default-features = false }

[dev-dependencies]
criterion = "0.3.0"
criterion = "0.4.0"
dev_utils = { path = "dev_utils" }

[[bench]]
Expand Down
104 changes: 65 additions & 39 deletions downsample_rs/src/m4/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ use ndarray::{s, Array1, ArrayView1};
use rayon::iter::IndexedParallelIterator;
use rayon::prelude::*;

// TODO: check for duplicate data in the output array
// -> In the current implementation we always add 4 datapoints per bin (if of
// course the bin has >= 4 datapoints). However, the argmin and argmax might
// be the start and end of the bin, which would result in duplicate data in
// the output array. (this is for example the case for monotonic data).

// --------------------- WITHOUT X

#[inline(always)]
Expand All @@ -26,7 +32,6 @@ pub(crate) fn m4_generic<T: Copy + PartialOrd>(
.exact_chunks(block_size)
.into_iter()
.enumerate()
// .take(n_out / 4)
.for_each(|(i, step)| {
let (min_index, max_index) = f_argminmax(step);

Expand Down Expand Up @@ -95,7 +100,7 @@ pub(crate) fn m4_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
#[inline(always)]
pub(crate) fn m4_generic_with_x<T: Copy>(
arr: ArrayView1<T>,
bin_idx_iterator: impl Iterator<Item = (usize, usize)>,
bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
n_out: usize,
f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
) -> Array1<usize> {
Expand All @@ -105,35 +110,43 @@ pub(crate) fn m4_generic_with_x<T: Copy>(
}

let arr_ptr = arr.as_ptr();
let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);

bin_idx_iterator
.enumerate()
.for_each(|(i, (start_idx, end_idx))| {
let step =
unsafe { ArrayView1::from_shape_ptr(end_idx - start_idx, arr_ptr.add(start_idx)) };
let (min_index, max_index) = f_argminmax(step);

sampled_indices[4 * i] = start_idx;

// Add the indexes in sorted order
if min_index < max_index {
sampled_indices[4 * i + 1] = min_index + start_idx;
sampled_indices[4 * i + 2] = max_index + start_idx;
let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);

bin_idx_iterator.for_each(|bin| {
if let Some((start, end)) = bin {
if end <= start + 4 {
// If the bin has <= 4 elements, just add them all
for i in start..end {
sampled_indices.push(i);
}
} else {
sampled_indices[4 * i + 1] = max_index + start_idx;
sampled_indices[4 * i + 2] = min_index + start_idx;
// If the bin has > 4 elements, add the first and last + argmin and argmax
let step = unsafe { ArrayView1::from_shape_ptr(end - start, arr_ptr.add(start)) };
let (min_index, max_index) = f_argminmax(step);

sampled_indices.push(start);

// Add the indexes in sorted order
if min_index < max_index {
sampled_indices.push(min_index + start);
sampled_indices.push(max_index + start);
} else {
sampled_indices.push(max_index + start);
sampled_indices.push(min_index + start);
}

sampled_indices.push(end - 1);
}
sampled_indices[4 * i + 3] = end_idx - 1;
});
}
});

sampled_indices
Array1::from_vec(sampled_indices)
}

#[inline(always)]
pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
arr: ArrayView1<T>,
bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = (usize, usize)>>,
bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
n_out: usize,
f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
) -> Array1<usize> {
Expand All @@ -146,24 +159,37 @@ pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
bin_idx_iterator
.flat_map(|bin_idx_iterator| {
bin_idx_iterator
.map(|(start, end)| {
let step = unsafe {
ArrayView1::from_shape_ptr(end - start, arr.as_ptr().add(start))
};
let (min_index, max_index) = f_argminmax(step);

// Add the indexes in sorted order
let mut sampled_index = [start, 0, 0, end - 1];
if min_index < max_index {
sampled_index[1] = min_index + start;
sampled_index[2] = max_index + start;
} else {
sampled_index[1] = max_index + start;
sampled_index[2] = min_index + start;
.map(|bin| {
match bin {
Some((start, end)) => {
if end <= start + 4 {
// If the bin has <= 4 elements, just return them all
return (start..end).collect::<Vec<usize>>();
}

// If the bin has > 4 elements, return the first and last + argmin and argmax
let step = unsafe {
ArrayView1::from_shape_ptr(end - start, arr.as_ptr().add(start))
};
let (min_index, max_index) = f_argminmax(step);

// Return the indexes in sorted order
let mut sampled_index = vec![start, 0, 0, end - 1];
if min_index < max_index {
sampled_index[1] = min_index + start;
sampled_index[2] = max_index + start;
} else {
sampled_index[1] = max_index + start;
sampled_index[2] = min_index + start;
}
sampled_index
} // If the bin is empty, return empty Vec
None => {
vec![]
}
}
sampled_index
})
.collect::<Vec<[usize; 4]>>()
.collect::<Vec<Vec<usize>>>()
})
.flatten()
.collect::<Vec<usize>>(),
Expand Down
66 changes: 66 additions & 0 deletions downsample_rs/src/m4/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,72 @@ mod tests {
assert_eq!(sampled_values, Array1::from(expected_values));
}

#[test]
fn test_m4_scalar_with_x_gap() {
// We will create a gap in the middle of the array
let x = (0..101).collect::<Vec<i32>>();

// Increment the second half of the array by 50
let x = x
.iter()
.map(|x| if *x > 50 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);
let arr = (0..101).map(|x| x as f32).collect::<Vec<f32>>();
let arr = Array1::from(arr);

let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 16); // One full gap
let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
assert_eq!(sampled_indices, Array1::from(expected_indices));

// Increment the second half of the array by 50 again
let x = x
.iter()
.map(|x| if *x > 101 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);

let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
let expected_indices = vec![
0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99, 100,
];
}

#[test]
fn test_m4_scalar_with_x_gap_parallel() {
// We will create a gap in the middle of the array
let x = (0..101).collect::<Vec<i32>>();

// Increment the second half of the array by 50
let x = x
.iter()
.map(|x| if *x > 50 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);
let arr = (0..101).map(|x| x as f32).collect::<Vec<f32>>();
let arr = Array1::from(arr);

let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 16); // One full gap
let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
assert_eq!(sampled_indices, Array1::from(expected_indices));

// Increment the second half of the array by 50 again
let x = x
.iter()
.map(|x| if *x > 101 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);

let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
let expected_indices = vec![
0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99, 100,
];
}

#[test]
fn test_many_random_runs_correct() {
let n: usize = 20_001; // not 20_000 because then the last bin is not "full"
Expand Down
66 changes: 66 additions & 0 deletions downsample_rs/src/m4/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,72 @@ mod tests {
assert_eq!(sampled_values, Array1::from(expected_values));
}

#[test]
fn test_m4_simd_with_x_gap() {
// We will create a gap in the middle of the array
let x = (0..101).collect::<Vec<i32>>();

// Increment the second half of the array by 50
let x = x
.iter()
.map(|x| if *x > 50 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);
let arr = (0..101).map(|x| x as f32).collect::<Vec<f32>>();
let arr = Array1::from(arr);

let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 16); // One full gap
let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
assert_eq!(sampled_indices, Array1::from(expected_indices));

// Increment the second half of the array by 50 again
let x = x
.iter()
.map(|x| if *x > 101 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);

let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
let expected_indices = vec![
0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99, 100,
];
}

#[test]
fn test_m4_simd_with_x_gap_parallel() {
// We will create a gap in the middle of the array
let x = (0..101).collect::<Vec<i32>>();

// Increment the second half of the array by 50
let x = x
.iter()
.map(|x| if *x > 50 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);
let arr = (0..101).map(|x| x as f32).collect::<Vec<f32>>();
let arr = Array1::from(arr);

let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 16); // One full gap
let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
assert_eq!(sampled_indices, Array1::from(expected_indices));

// Increment the second half of the array by 50 again
let x = x
.iter()
.map(|x| if *x > 101 { *x + 50 } else { *x })
.collect::<Vec<i32>>();
let x = Array1::from(x);

let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20);
assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
let expected_indices = vec![
0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99, 100,
];
}

#[test]
fn test_many_random_runs_correct() {
let n = 20_001; // not 20_000 because then the last bin is not "full"
Expand Down
Loading

0 comments on commit aeeb8ad

Please sign in to comment.