Skip to content

Commit 2cef582

Browse files
Auto merge of #147801 - Zalathar:bitwise-changes, r=<try>
(EXPERIMENT) Make ChunkedBitSet's `bitwise_changes` more unroller/autovectorizer-friendly
2 parents 53a741f + 1021357 commit 2cef582

File tree

1 file changed

+40
-6
lines changed

1 file changed

+40
-6
lines changed

compiler/rustc_index/src/bit_set.rs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,15 +1119,49 @@ fn bitwise_changes<Op>(out_vec: &[Word], in_vec: &[Word], op: Op) -> bool
11191119
where
11201120
Op: Fn(Word, Word) -> Word,
11211121
{
1122-
assert_eq!(out_vec.len(), in_vec.len());
1123-
for (out_elem, in_elem) in iter::zip(out_vec, in_vec) {
1124-
let old_val = *out_elem;
1125-
let new_val = op(old_val, *in_elem);
1126-
if old_val != new_val {
1122+
chunked_pairwise_any::<4, Word>(out_vec, in_vec, |&out_elem, &in_elem| {
1123+
out_elem != op(out_elem, in_elem)
1124+
})
1125+
}
1126+
1127+
/// Given two slices of equal length, returns true if `pred_fn(&a[i], &b[i])`
1128+
/// is true for any `i`.
1129+
///
1130+
/// To improve opportunities for loop-unrolling and autovectorization, items
1131+
/// are grouped into chunks of length `N`, with early returns only occurring on
1132+
/// chunk boundaries.
1133+
fn chunked_pairwise_any<const N: usize, T>(
1134+
a_slice: &[T],
1135+
b_slice: &[T],
1136+
pred_fn: impl Fn(&T, &T) -> bool,
1137+
) -> bool {
1138+
assert_eq!(a_slice.len(), b_slice.len());
1139+
1140+
let chunks = |s| <[T]>::chunks_exact(s, N);
1141+
let rest = |s| chunks(s).remainder();
1142+
1143+
// First, check the full N-sized chunks.
1144+
for (a_chunk, b_chunk) in iter::zip(chunks(a_slice), chunks(b_slice)) {
1145+
let mut chunk_any = false;
1146+
for (a, b) in iter::zip(a_chunk, b_chunk) {
1147+
if pred_fn(a, b) {
1148+
chunk_any = true;
1149+
}
1150+
}
1151+
if chunk_any {
11271152
return true;
11281153
}
11291154
}
1130-
false
1155+
1156+
// Finally check the 0..N items that don't form a full chunk.
1157+
let mut rest_any = false;
1158+
for (a, b) in iter::zip(rest(a_slice), rest(b_slice)) {
1159+
if pred_fn(a, b) {
1160+
rest_any = true;
1161+
}
1162+
}
1163+
1164+
rest_any
11311165
}
11321166

11331167
/// A bitset with a mixed representation, using `DenseBitSet` for small and

0 commit comments

Comments
 (0)