Skip to content

Commit d8c1c98

Browse files
Add iter_{canonical}_minimizer_values_u128
1 parent ab2c3cb commit d8c1c98

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
# Changelog
22

3+
## 1.3
4+
- Update to `packed-seq` `3.2.1` for `u128` kmer value support.
5+
- Add `iter_{canonical}_minimizer_values_u128` to iterate over `u128` kmer values.
6+
37
## 1.2
4-
- Fix #10: Add `extract_{canonical}_minimizer_values` to convert positions into `u64` kmer values.
8+
- Fix #10: Add `iter_{canonical}_minimizer_values` to convert positions into `u64` kmer values.
59
- Update to `packed-seq` `3.0`.
610
- Fix to properly initialize arrays when collecting super-k-mers.
711
- Update `packed-seq` to support non-byte offsets.

simd-minimizers/src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,36 @@ pub fn iter_canonical_minimizer_values<'s, S: Seq<'s>>(
304304
})
305305
}
306306

307+
/// Given a sequence and a list of positions, iterate over the k-mer values at those positions.
308+
#[inline(always)]
309+
pub fn iter_minimizer_values_u128<'s, S: Seq<'s>>(
310+
seq: S,
311+
k: usize,
312+
positions: &'s [u32],
313+
) -> impl ExactSizeIterator<Item = u128> + Captures<&'s ()> + Clone {
314+
positions
315+
.iter()
316+
.map(move |&pos| seq.read_kmer_u128(k, pos as usize))
317+
}
318+
319+
/// Given a sequence and a list of positions, iterate over the *canonical* k-mer values at those positions.
320+
///
321+
/// Canonical k-mers are defined as the *minimum* of the k-mer and its reverse complement.
322+
/// Note that this also works for even `k`, but typically one would want `k` to be odd.
323+
#[inline(always)]
324+
pub fn iter_canonical_minimizer_values_u128<'s, S: Seq<'s>>(
325+
seq: S,
326+
k: usize,
327+
positions: &'s [u32],
328+
) -> impl ExactSizeIterator<Item = u128> + Captures<&'s ()> + Clone {
329+
positions.iter().map(move |&pos| {
330+
let a = seq.read_kmer_u128(k, pos as usize);
331+
let b = seq.read_revcomp_kmer_u128(k, pos as usize);
332+
core::cmp::min(a, b)
333+
})
334+
}
335+
336+
307337
/// Variants that always use mulHash, instead of the default ntHash for DNA and mulHash for text.
308338
pub mod mul_hash {
309339
use super::*;

0 commit comments

Comments
 (0)