We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c0b6a6e commit dd4320fCopy full SHA for dd4320f
cp-algo/util/simd.hpp
@@ -43,10 +43,11 @@ namespace cp_algo {
43
}
44
45
[[gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
46
- auto x_ninv = u64x4(u32x8(x) * (u32x8() + imod));
47
#ifdef __AVX2__
+ auto x_ninv = u64x4(_mm256_mul_epu32(__m256i(x), __m256i() + imod));
48
x += u64x4(_mm256_mul_epu32(__m256i(x_ninv), __m256i() + mod));
49
#else
50
+ auto x_ninv = x * imod;
51
x += low32(x_ninv) * mod;
52
#endif
53
return x >> 32;
0 commit comments