@@ -1119,15 +1119,49 @@ fn bitwise_changes<Op>(out_vec: &[Word], in_vec: &[Word], op: Op) -> bool
11191119where
11201120 Op : Fn ( Word , Word ) -> Word ,
11211121{
1122- assert_eq ! ( out_vec. len( ) , in_vec. len( ) ) ;
1123- for ( out_elem, in_elem) in iter:: zip ( out_vec, in_vec) {
1124- let old_val = * out_elem;
1125- let new_val = op ( old_val, * in_elem) ;
1126- if old_val != new_val {
1122+ chunked_pairwise_any :: < 4 , Word > ( out_vec, in_vec, |& out_elem, & in_elem| {
1123+ out_elem != op ( out_elem, in_elem)
1124+ } )
1125+ }
1126+
1127+ /// Given two slices of equal length, returns true if `pred_fn(&a[i], &b[i])`
1128+ /// is true for any `i`.
1129+ ///
1130+ /// To improve opportunities for loop-unrolling and autovectorization, items
1131+ /// are grouped into chunks of length `N`, with early returns only occurring on
1132+ /// chunk boundaries.
1133+ fn chunked_pairwise_any < const N : usize , T > (
1134+ a_slice : & [ T ] ,
1135+ b_slice : & [ T ] ,
1136+ pred_fn : impl Fn ( & T , & T ) -> bool ,
1137+ ) -> bool {
1138+ assert_eq ! ( a_slice. len( ) , b_slice. len( ) ) ;
1139+
1140+ let chunks = |s| <[ T ] >:: chunks_exact ( s, N ) ;
1141+ let rest = |s| chunks ( s) . remainder ( ) ;
1142+
1143+ // First, check the full N-sized chunks.
1144+ for ( a_chunk, b_chunk) in iter:: zip ( chunks ( a_slice) , chunks ( b_slice) ) {
1145+ let mut chunk_any = false ;
1146+ for ( a, b) in iter:: zip ( a_chunk, b_chunk) {
1147+ if pred_fn ( a, b) {
1148+ chunk_any = true ;
1149+ }
1150+ }
1151+ if chunk_any {
11271152 return true ;
11281153 }
11291154 }
1130- false
1155+
1156+ // Finally check the 0..N items that don't form a full chunk.
1157+ let mut rest_any = false ;
1158+ for ( a, b) in iter:: zip ( rest ( a_slice) , rest ( b_slice) ) {
1159+ if pred_fn ( a, b) {
1160+ rest_any = true ;
1161+ }
1162+ }
1163+
1164+ rest_any
11311165}
11321166
11331167/// A bitset with a mixed representation, using `DenseBitSet` for small and
0 commit comments