Skip to content

Commit dd4320f

Browse files
committed
Use VPMULUDQ instead of VPMULLD in montgomery_reduce
1 parent c0b6a6e commit dd4320f

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

cp-algo/util/simd.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ namespace cp_algo {
4343
}
4444

4545
[[gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
46-
auto x_ninv = u64x4(u32x8(x) * (u32x8() + imod));
4746
#ifdef __AVX2__
47+
auto x_ninv = u64x4(_mm256_mul_epu32(__m256i(x), __m256i() + imod));
4848
x += u64x4(_mm256_mul_epu32(__m256i(x_ninv), __m256i() + mod));
4949
#else
50+
auto x_ninv = x * imod;
5051
x += low32(x_ninv) * mod;
5152
#endif
5253
return x >> 32;

0 commit comments

Comments
 (0)