Skip to content

Commit d4ecb7e

Browse files
committed
Increased the amount of prefetch done (and small parameter changes)
1 parent aad436c commit d4ecb7e

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

src/avx512-32bit-qsort.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct zmm_vector<int32_t> {
3737
using opmask_t = __mmask16;
3838
static const uint8_t numlanes = 16;
3939
static constexpr int network_sort_threshold = 512;
40-
static constexpr int partition_unroll_factor = 4;
40+
static constexpr int partition_unroll_factor = 8;
4141

4242
using swizzle_ops = avx512_32bit_swizzle_ops;
4343

@@ -158,7 +158,7 @@ struct zmm_vector<uint32_t> {
158158
using opmask_t = __mmask16;
159159
static const uint8_t numlanes = 16;
160160
static constexpr int network_sort_threshold = 512;
161-
static constexpr int partition_unroll_factor = 4;
161+
static constexpr int partition_unroll_factor = 8;
162162

163163
using swizzle_ops = avx512_32bit_swizzle_ops;
164164

src/avx512-common-qsort.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,16 +440,16 @@ X86_SIMD_SORT_INLINE arrsize_t partition_avx512_unrolled(type_t *arr,
440440
X86_SIMD_SORT_UNROLL_LOOP(8)
441441
for (int ii = 0; ii < num_unroll; ++ii) {
442442
curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
443+
_mm_prefetch(arr + right + ii * vtype::numlanes - num_unroll * vtype::numlanes, _MM_HINT_T0);
443444
}
444-
_mm_prefetch(arr + right - num_unroll * vtype::numlanes, _MM_HINT_T0);
445445
}
446446
else {
447447
X86_SIMD_SORT_UNROLL_LOOP(8)
448448
for (int ii = 0; ii < num_unroll; ++ii) {
449449
curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
450+
_mm_prefetch(arr + left + ii * vtype::numlanes + num_unroll * vtype::numlanes, _MM_HINT_T0);
450451
}
451452
left += num_unroll * vtype::numlanes;
452-
_mm_prefetch(arr + left, _MM_HINT_T0);
453453
}
454454
// partition the current vector and save it on both sides of the array
455455
X86_SIMD_SORT_UNROLL_LOOP(8)

0 commit comments

Comments
 (0)