Skip to content

Commit 38e2c56

Browse files
daosvikchfastholiman
authored
Optimised modular arithmetic targeting elliptic curve operations (#86)
* Adds faster 256-bit modular addition and multiplication * Fix problems identified by CI * Use AddOverflow and SubOverflow instead of add256 and sub256 * Remove unnecessary assignment and replace use of SubOverflow with Sub * Use explicit returns * Add comment about iteration count in AddMod * Fix mistake in calculation of reciprocal for (64n+1)-bit moduli * Change expression according to readability preference Co-authored-by: Paweł Bylica <chfast@gmail.com> * Remove unnecessary case safeguards (CI: SCC-S1023, RVV-A0010) * Omit unnecessary break at end of case clause (CI: RVV-A0010) * Remove new shift function, use Lsh instead * Use unsigned shift count * Fix AddMod * Replace unnecessary use of SubOverflow with Sub Co-authored-by: Paweł Bylica <chfast@gmail.com> * create reciprocalCache type * mod: modularize cache * Pass cache by reference * Reduce overhead to a single pair of hit/miss counters for the reciprocalCache * Add some more explicit returns * Improve explanation of cache size parameters * gofmt benchmarks_test.go Includes reordering of some tests to preserve readability * Add MulMod tests with one operand a power of 2 * Add MulMod tests with one operand a power of 2 plus/minus 1 * test: Add AddMod test cases for unoptimized case with overflow * test: Add tests for leadingZeros() * Add tests with one operand being 2^256 minus a power of 2 (only one 0-bit) * Disable the cache if configured to 0-way cache sets * Add fixed modulus and precalculation of its reciprocal * tests: more ternary ops cases, more coverage * squashmelater: commit to trigger codecov * mod: remove global cache * move reciprocal cache to separate file, un-export * make fixed modulus non-global * lint fix * Remove automatic cache * Pass modulus and reciprocal by reference instead of value * Make reciprocal() specialized for m[3] != 0 (modulus >= 2^193) * Add two corner case tests for reciprocal * Remove unreachable safeguard code in reciprocal * Comment out unused onesCount * Make leadingZeros faster * Simplify detection of whether a modulus is a power of 2 * Speed up the end of reciprocal * Add MulModWithReciprocal, export Reciprocal * Pass reciprocal as pointer parameter to MulModWithReciprocal * Remove unused func onesCount * Add testing of MulModWithReciprocal * Adjust comments and tests to reflect that Reciprocal now only supports m[3] != 0 * Also test MulModWithReciprocal on regular test cases Co-authored-by: Paweł Bylica <chfast@gmail.com> Co-authored-by: Martin Holst Swende <martin@swende.se>
1 parent 71c9c37 commit 38e2c56

File tree

5 files changed

+999
-33
lines changed

5 files changed

+999
-33
lines changed

benchmarks_test.go

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func initSamples() bool {
6464
l := newRandInt(1)
6565
g := newRandInt(1)
6666
if g.Lt(&l) {
67-
g,l = l,g
67+
g, l = l, g
6868
}
6969
if g[0] == 0 {
7070
g[0]++
@@ -77,7 +77,7 @@ func initSamples() bool {
7777
l = newRandInt(2)
7878
g = newRandInt(2)
7979
if g.Lt(&l) {
80-
g,l = l,g
80+
g, l = l, g
8181
}
8282
if g[1] == 0 {
8383
g[1]++
@@ -90,7 +90,7 @@ func initSamples() bool {
9090
l = newRandInt(3)
9191
g = newRandInt(3)
9292
if g.Lt(&l) {
93-
g,l = l,g
93+
g, l = l, g
9494
}
9595
if g[2] == 0 {
9696
g[2]++
@@ -103,7 +103,7 @@ func initSamples() bool {
103103
l = newRandInt(4)
104104
g = newRandInt(4)
105105
if g.Lt(&l) {
106-
g,l = l,g
106+
g, l = l, g
107107
}
108108
if g[3] == 0 {
109109
g[3]++
@@ -599,14 +599,14 @@ func BenchmarkDiv(b *testing.B) {
599599
}
600600

601601
b.Run("small/uint256", func(b *testing.B) { benchmarkDivUint256(b, &int32Samples, &int32SamplesLt) })
602-
b.Run("small/big", func(b *testing.B) { benchmarkDivBig(b, &big32Samples, &big32SamplesLt) })
603602
b.Run("mod64/uint256", func(b *testing.B) { benchmarkDivUint256(b, &int256Samples, &int64Samples) })
604-
b.Run("mod64/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big64Samples) })
605603
b.Run("mod128/uint256", func(b *testing.B) { benchmarkDivUint256(b, &int256Samples, &int128Samples) })
606-
b.Run("mod128/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big128Samples) })
607604
b.Run("mod192/uint256", func(b *testing.B) { benchmarkDivUint256(b, &int256Samples, &int192Samples) })
608-
b.Run("mod192/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big192Samples) })
609605
b.Run("mod256/uint256", func(b *testing.B) { benchmarkDivUint256(b, &int256Samples, &int256SamplesLt) })
606+
b.Run("small/big", func(b *testing.B) { benchmarkDivBig(b, &big32Samples, &big32SamplesLt) })
607+
b.Run("mod64/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big64Samples) })
608+
b.Run("mod128/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big128Samples) })
609+
b.Run("mod192/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big192Samples) })
610610
b.Run("mod256/big", func(b *testing.B) { benchmarkDivBig(b, &big256Samples, &big256SamplesLt) })
611611
}
612612

@@ -629,14 +629,14 @@ func BenchmarkMod(b *testing.B) {
629629
}
630630

631631
b.Run("small/uint256", func(b *testing.B) { benchmarkModUint256(b, &int32Samples, &int32SamplesLt) })
632-
b.Run("small/big", func(b *testing.B) { benchmarkModBig(b, &big32Samples, &big32SamplesLt) })
633632
b.Run("mod64/uint256", func(b *testing.B) { benchmarkModUint256(b, &int256Samples, &int64Samples) })
634-
b.Run("mod64/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big64Samples) })
635633
b.Run("mod128/uint256", func(b *testing.B) { benchmarkModUint256(b, &int256Samples, &int128Samples) })
636-
b.Run("mod128/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big128Samples) })
637634
b.Run("mod192/uint256", func(b *testing.B) { benchmarkModUint256(b, &int256Samples, &int192Samples) })
638-
b.Run("mod192/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big192Samples) })
639635
b.Run("mod256/uint256", func(b *testing.B) { benchmarkModUint256(b, &int256Samples, &int256SamplesLt) })
636+
b.Run("small/big", func(b *testing.B) { benchmarkModBig(b, &big32Samples, &big32SamplesLt) })
637+
b.Run("mod64/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big64Samples) })
638+
b.Run("mod128/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big128Samples) })
639+
b.Run("mod192/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big192Samples) })
640640
b.Run("mod256/big", func(b *testing.B) { benchmarkModBig(b, &big256Samples, &big256SamplesLt) })
641641
}
642642

@@ -667,19 +667,38 @@ func BenchmarkAddMod(b *testing.B) {
667667
}
668668
}
669669

670-
b.Run("small/uint256", func(b *testing.B) { benchmarkAddModUint256 (b, &int32SamplesLt, &int32Samples) })
671-
b.Run("small/big", func(b *testing.B) { benchmarkAddModBig (b, &big32SamplesLt, &big32Samples) })
672-
b.Run("mod64/uint256", func(b *testing.B) { benchmarkAddModUint256 (b, &int64SamplesLt, &int64Samples) })
673-
b.Run("mod64/big", func(b *testing.B) { benchmarkAddModBig (b, &big64SamplesLt, &big64Samples) })
674-
b.Run("mod128/uint256", func(b *testing.B) { benchmarkAddModUint256 (b, &int128SamplesLt, &int128Samples) })
675-
b.Run("mod128/big", func(b *testing.B) { benchmarkAddModBig (b, &big128SamplesLt, &big128Samples) })
676-
b.Run("mod192/uint256", func(b *testing.B) { benchmarkAddModUint256 (b, &int192SamplesLt, &int192Samples) })
677-
b.Run("mod192/big", func(b *testing.B) { benchmarkAddModBig (b, &big192SamplesLt, &big192Samples) })
678-
b.Run("mod256/uint256", func(b *testing.B) { benchmarkAddModUint256 (b, &int256SamplesLt, &int256Samples) })
679-
b.Run("mod256/big", func(b *testing.B) { benchmarkAddModBig (b, &big256SamplesLt, &big256Samples) })
670+
b.Run("small/uint256", func(b *testing.B) { benchmarkAddModUint256(b, &int32SamplesLt, &int32Samples) })
671+
b.Run("mod64/uint256", func(b *testing.B) { benchmarkAddModUint256(b, &int64SamplesLt, &int64Samples) })
672+
b.Run("mod128/uint256", func(b *testing.B) { benchmarkAddModUint256(b, &int128SamplesLt, &int128Samples) })
673+
b.Run("mod192/uint256", func(b *testing.B) { benchmarkAddModUint256(b, &int192SamplesLt, &int192Samples) })
674+
b.Run("mod256/uint256", func(b *testing.B) { benchmarkAddModUint256(b, &int256SamplesLt, &int256Samples) })
675+
b.Run("small/big", func(b *testing.B) { benchmarkAddModBig(b, &big32SamplesLt, &big32Samples) })
676+
b.Run("mod64/big", func(b *testing.B) { benchmarkAddModBig(b, &big64SamplesLt, &big64Samples) })
677+
b.Run("mod128/big", func(b *testing.B) { benchmarkAddModBig(b, &big128SamplesLt, &big128Samples) })
678+
b.Run("mod192/big", func(b *testing.B) { benchmarkAddModBig(b, &big192SamplesLt, &big192Samples) })
679+
b.Run("mod256/big", func(b *testing.B) { benchmarkAddModBig(b, &big256SamplesLt, &big256Samples) })
680680
}
681681

682682
func BenchmarkMulMod(b *testing.B) {
683+
benchmarkMulModUint256R := func(b *testing.B, factorsSamples, modSamples *[numSamples]Int) {
684+
iter := (b.N + numSamples - 1) / numSamples
685+
686+
var mu [numSamples][5]uint64
687+
688+
for i := 0; i < numSamples; i++ {
689+
mu[i] = Reciprocal(&modSamples[i])
690+
}
691+
692+
b.ResetTimer()
693+
694+
for j := 0; j < numSamples; j++ {
695+
x := factorsSamples[j]
696+
697+
for i := 0; i < iter; i++ {
698+
x.MulModWithReciprocal(&x, &factorsSamples[j], &modSamples[j], &mu[j])
699+
}
700+
}
701+
}
683702
benchmarkMulModUint256 := func(b *testing.B, factorsSamples, modSamples *[numSamples]Int) {
684703
iter := (b.N + numSamples - 1) / numSamples
685704

@@ -704,16 +723,17 @@ func BenchmarkMulMod(b *testing.B) {
704723
}
705724
}
706725

707-
b.Run("small/uint256", func(b *testing.B) { benchmarkMulModUint256 (b, &int32SamplesLt, &int32Samples) })
708-
b.Run("small/big", func(b *testing.B) { benchmarkMulModBig (b, &big32SamplesLt, &big32Samples) })
709-
b.Run("mod64/uint256", func(b *testing.B) { benchmarkMulModUint256 (b, &int64SamplesLt, &int64Samples) })
710-
b.Run("mod64/big", func(b *testing.B) { benchmarkMulModBig (b, &big64SamplesLt, &big64Samples) })
711-
b.Run("mod128/uint256", func(b *testing.B) { benchmarkMulModUint256 (b, &int128SamplesLt, &int128Samples) })
712-
b.Run("mod128/big", func(b *testing.B) { benchmarkMulModBig (b, &big128SamplesLt, &big128Samples) })
713-
b.Run("mod192/uint256", func(b *testing.B) { benchmarkMulModUint256 (b, &int192SamplesLt, &int192Samples) })
714-
b.Run("mod192/big", func(b *testing.B) { benchmarkMulModBig (b, &big192SamplesLt, &big192Samples) })
715-
b.Run("mod256/uint256", func(b *testing.B) { benchmarkMulModUint256 (b, &int256SamplesLt, &int256Samples) })
716-
b.Run("mod256/big", func(b *testing.B) { benchmarkMulModBig (b, &big256SamplesLt, &big256Samples) })
726+
b.Run("small/uint256", func(b *testing.B) { benchmarkMulModUint256(b, &int32SamplesLt, &int32Samples) })
727+
b.Run("mod64/uint256", func(b *testing.B) { benchmarkMulModUint256(b, &int64SamplesLt, &int64Samples) })
728+
b.Run("mod128/uint256", func(b *testing.B) { benchmarkMulModUint256(b, &int128SamplesLt, &int128Samples) })
729+
b.Run("mod192/uint256", func(b *testing.B) { benchmarkMulModUint256(b, &int192SamplesLt, &int192Samples) })
730+
b.Run("mod256/uint256", func(b *testing.B) { benchmarkMulModUint256(b, &int256SamplesLt, &int256Samples) })
731+
b.Run("mod256/uint256r", func(b *testing.B) { benchmarkMulModUint256R(b, &int256SamplesLt, &int256Samples) })
732+
b.Run("small/big", func(b *testing.B) { benchmarkMulModBig(b, &big32SamplesLt, &big32Samples) })
733+
b.Run("mod64/big", func(b *testing.B) { benchmarkMulModBig(b, &big64SamplesLt, &big64Samples) })
734+
b.Run("mod128/big", func(b *testing.B) { benchmarkMulModBig(b, &big128SamplesLt, &big128Samples) })
735+
b.Run("mod192/big", func(b *testing.B) { benchmarkMulModBig(b, &big192SamplesLt, &big192Samples) })
736+
b.Run("mod256/big", func(b *testing.B) { benchmarkMulModBig(b, &big256SamplesLt, &big256Samples) })
717737
}
718738

719739
func benchmark_SdivLarge_Big(bench *testing.B) {

0 commit comments

Comments
 (0)