Skip to content

Commit b99b313

Browse files
move splat out of loop
1 parent c54ec59 commit b99b313

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

src/canonical.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,15 @@ pub fn canonical_mapper_simd(l: usize) -> (Delay, impl FnMut((S, S)) -> u32x8) {
4646

4747
// Cnt of odd characters, offset by -l/2 so >0 is canonical and <0 is not.
4848
let mut cnt = i32x8::splat(-(l as i32));
49+
let zero = i32x8::splat(0);
4950
let two = i32x8::splat(2);
5051

5152
(
5253
Delay(l - 1),
5354
#[inline(always)]
5455
move |(a, r)| {
5556
cnt += unsafe { transmute::<_, i32x8>(a) } & two;
56-
let out = unsafe { transmute::<_, u32x8>(cnt.cmp_gt(i32x8::splat(0))) };
57+
let out = unsafe { transmute::<_, u32x8>(cnt.cmp_gt(zero)) };
5758
cnt -= unsafe { transmute::<_, i32x8>(r) } & two;
5859
out
5960
},

src/sliding_min.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ pub fn sliding_min_mapper_simd<const LEFT: bool>(
235235
let pos_mask = S::splat(0x0000_ffff);
236236
let max_pos = S::splat((1 << 16) - 1);
237237
let mut pos = S::splat(0);
238+
let one = S::splat(1);
238239
// Sliding min is over w+k-1 characters, so chunks overlap w+k-2.
239240
// Thus, the true length of each lane is len-(k+w-2).
240241
//
@@ -251,7 +252,7 @@ pub fn sliding_min_mapper_simd<const LEFT: bool>(
251252
}
252253
// slightly faster than assigning S::splat(u32::MAX)
253254
let elem = (if LEFT { val } else { !val } & val_mask) | pos;
254-
pos += S::splat(1);
255+
pos += one;
255256
ring_buf.push(elem);
256257
prefix_min = simd_min::<LEFT>(prefix_min, elem);
257258
// After a chunk has been filled, compute suffix minima.
@@ -317,6 +318,7 @@ pub fn sliding_lr_min_mapper_simd(
317318
let max_pos = S::splat((1 << 16) - 1);
318319
let mut pos = S::splat(0);
319320
let mut pos_offset: S = from_fn(|l| (l * len.saturating_sub(w - 1)) as u32).into();
321+
let one = S::splat(1);
320322

321323
#[inline(always)]
322324
move |val| {
@@ -329,7 +331,7 @@ pub fn sliding_lr_min_mapper_simd(
329331
let lelem = (val & val_mask) | pos;
330332
let relem = (!val & val_mask) | pos;
331333
let elem = (lelem, relem);
332-
pos += S::splat(1);
334+
pos += one;
333335
ring_buf.push(elem);
334336
prefix_lr_min = simd_lr_min(prefix_lr_min, elem);
335337
// After a chunk has been filled, compute suffix minima.

0 commit comments

Comments
 (0)