Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions benchmarks/bench_ec_g1.nim
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ proc main() =
separator()
scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp[curve]], MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 2, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], window = 2, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 3, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], window = 3, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 4, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], window = 4, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 5, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], window = 5, MulIters)
separator()
scalarMulEndo(ECP_SWei_Proj[Fp[curve]], MulIters)
separator()
Expand Down
12 changes: 7 additions & 5 deletions benchmarks/bench_ec_g2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,16 @@ proc main() =
separator()
scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp2[curve]], MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], scratchSpaceSize = 1 shl 2, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], window = 2, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], scratchSpaceSize = 1 shl 3, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], window = 3, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], scratchSpaceSize = 1 shl 4, MulIters)
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], window = 4, MulIters)
separator()
scalarMulGenericBench(ECP_SWei_Proj[Fp2[curve]], window = 5, MulIters)
separator()
scalarMulEndo(ECP_SWei_Proj[Fp2[curve]], MulIters)
separator()
# scalarMulEndo(ECP_SWei_Proj[Fp2[curve]], MulIters)
# separator()
separator()

main()
Expand Down
26 changes: 10 additions & 16 deletions benchmarks/bench_elliptic_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,12 @@ proc notes*() =
echo "Notes:"
echo " - Compilers:"
echo " Compilers are severely limited on multiprecision arithmetic."
echo " Inline Assembly is used by default (nimble bench_fp)."
echo " Bench without assembly can use \"nimble bench_fp_gcc\" or \"nimble bench_fp_clang\"."
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
echo " GCC also seems to have issues with large temporaries and register spilling."
echo " This is somewhat alleviated by Constantine compile-time assembler."
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc\" or \"nimble bench_ec_g1_clang\"."
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc_noasm\" or \"nimble bench_ec_g1_clang_noasm\"."
echo " - The simplest operations might be optimized away by the compiler."
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"

Expand Down Expand Up @@ -139,22 +142,18 @@ proc doublingBench*(T: typedesc, iters: int) =
bench("EC Double " & G1_or_G2, T, iters):
r.double(P)

proc scalarMulGenericBench*(T: typedesc, scratchSpaceSize: static int, iters: int) =
proc scalarMulGenericBench*(T: typedesc, window: static int, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"

var r {.noInit.}: T
let P = rng.random_unsafe(T) # TODO: clear cofactor

let exponent = rng.random_unsafe(BigInt[bits])
var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte]
exponentCanonical.exportRawUint(exponent, bigEndian)

var scratchSpace{.noInit.}: array[scratchSpaceSize, T]

bench("EC ScalarMul Generic " & G1_or_G2 & " (scratchsize = " & $scratchSpaceSize & ')', T, iters):
bench("EC ScalarMul Generic " & G1_or_G2 & " (window = " & $window & ", scratchsize = " & $(1 shl window) & ')', T, iters):
r = P
r.scalarMulGeneric(exponentCanonical, scratchSpace)
r.scalarMulGeneric(exponent, window)

proc scalarMulEndo*(T: typedesc, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
Expand All @@ -167,10 +166,7 @@ proc scalarMulEndo*(T: typedesc, iters: int) =

bench("EC ScalarMul " & G1_or_G2 & " (endomorphism accelerated)", T, iters):
r = P
when T.F is Fp:
r.scalarMulGLV(exponent)
else:
{.error: "Not implemented".}
r.scalarMulEndo(exponent)

proc scalarMulEndoWindow*(T: typedesc, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
Expand All @@ -196,9 +192,7 @@ proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) =
let P = rng.random_unsafe(T) # TODO: clear cofactor

let exponent = rng.random_unsafe(BigInt[bits])
var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte]
exponentCanonical.exportRawUint(exponent, bigEndian)

bench("EC ScalarMul " & G1_or_G2 & " (unsafe reference DoubleAdd)", T, iters):
r = P
r.unsafe_ECmul_double_add(exponentCanonical)
r.unsafe_ECmul_double_add(exponent)
7 changes: 5 additions & 2 deletions benchmarks/bench_fields_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,12 @@ proc notes*() =
echo "Notes:"
echo " - Compilers:"
echo " Compilers are severely limited on multiprecision arithmetic."
echo " Inline Assembly is used by default (nimble bench_fp)."
echo " Bench without assembly can use \"nimble bench_fp_gcc\" or \"nimble bench_fp_clang\"."
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
echo " GCC also seems to have issues with large temporaries and register spilling."
echo " This is somewhat alleviated by Constantine compile-time assembler."
echo " Bench on specific compiler with assembler: \"nimble bench_fp_gcc\" or \"nimble bench_fp_clang\"."
echo " Bench on specific compiler with assembler: \"nimble bench_fp_gcc_noasm\" or \"nimble bench_fp_clang_noasm\"."
echo " - The simplest operations might be optimized away by the compiler."
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"

Expand Down
21 changes: 20 additions & 1 deletion constantine/arithmetic/bigints.nim
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,12 @@ func cswap*(a, b: var BigInt, ctl: CTBool) =
func copyTruncatedFrom*[dBits, sBits: static int](dst: var BigInt[dBits], src: BigInt[sBits]) =
## Copy `src` into `dst`
## if `dst` is not big enough, only the low words are copied
## if `src` is smaller than `dst` the higher words of `dst` will NOT be overwritten
## if `src` is smaller than `dst` the higher words of `dst` will be overwritten

for wordIdx in 0 ..< min(dst.limbs.len, src.limbs.len):
dst.limbs[wordIdx] = src.limbs[wordIdx]
for wordIdx in min(dst.limbs.len, src.limbs.len) ..< dst.limbs.len:
dst.limbs[wordIdx] = SecretWord(0)

# Comparison
# ------------------------------------------------------------
Expand Down Expand Up @@ -128,6 +130,23 @@ func isOdd*(a: BigInt): SecretBool =
## Returns true if a is odd
a.limbs.isOdd

func isMsbSet*(a: BigInt): SecretBool =
## Returns true if MSB is set
## i.e. if a BigInt is interpreted
## as signed AND the full bitwidth
## is not used by construction
## This is equivalent to checking
## if the number is negative

# MSB is at announced bits - (wordsRequired - 1)
const msb_pos = BigInt.bits-1 - (BigInt.bits.wordsRequired - 1)
SecretBool((BaseType(a.limbs[a.limbs.len-1]) shr msb_pos) and 1)

func eq*(a: BigInt, n: SecretWord): SecretBool =
## Returns true if ``a`` is equal
## to the specified small word
a.limbs.eq n

# Arithmetic
# ------------------------------------------------------------

Expand Down
11 changes: 8 additions & 3 deletions constantine/arithmetic/limbs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,17 @@ func isZero*(a: Limbs): SecretBool =
accum = accum or a[i]
result = accum.isZero()

func isOne*(a: Limbs): SecretBool =
## Returns true if ``a`` is equal to one
result = a[0] == SecretWord(1)
func eq*(a: Limbs, n: SecretWord): SecretBool =
## Returns true if ``a`` is equal
## to the specified small word
result = a[0] == n
for i in 1 ..< a.len:
result = result and a[i].isZero()

func isOne*(a: Limbs): SecretBool =
## Returns true if ``a`` is equal to one
a.eq(SecretWord(1))

func isOdd*(a: Limbs): SecretBool =
## Returns true if a is odd
SecretBool(a[0] and SecretWord(1))
Expand Down
88 changes: 50 additions & 38 deletions constantine/elliptic/ec_endomorphism_accel.nim
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import
../arithmetic,
../io/io_bigints,
../towers,
../isogeny/frobenius,
./ec_weierstrass_affine,
./ec_weierstrass_projective,
./ec_endomorphism_params
Expand Down Expand Up @@ -192,7 +193,7 @@ func secretLookup[T](dst: var T, table: openArray[T], index: SecretWord) =
let selector = SecretWord(i) == index
dst.ccopy(table[i], selector)

func scalarMulGLV*[scalBits](
func scalarMulEndo*[scalBits](
P: var ECP_SWei_Proj,
scalar: BigInt[scalBits]
) =
Expand All @@ -201,35 +202,51 @@ func scalarMulGLV*[scalBits](
## P <- [k] P
##
## This is a scalar multiplication accelerated by an endomorphism
## via the GLV (Gallant-lambert-Vanstone) decomposition.
## - via the GLV (Gallant-lambert-Vanstone) decomposition on G1
## - via the GLS (Galbraith-Lin-Scott) decomposition on G2
##
## Requires:
## - Cofactor to be cleared
## - 0 <= scalar < curve order
const C = P.F.C # curve
static: doAssert: scalBits == C.getCurveOrderBitwidth()
static: doAssert scalBits <= C.getCurveOrderBitwidth(), "Do not use endomorphism to multiply beyond the curve order"
when P.F is Fp:
const M = 2

# 1. Compute endomorphisms
var endomorphisms {.noInit.}: array[M-1, typeof(P)]
endomorphisms[0] = P
endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p()
# 1. Compute endomorphisms
var endomorphisms {.noInit.}: array[M-1, typeof(P)]
endomorphisms[0] = P
endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p()
elif P.F is Fp2:
const M = 4
# 1. Compute endomorphisms
var endomorphisms {.noInit.}: array[M-1, typeof(P)]
endomorphisms[0].frobenius_psi(P)
endomorphisms[1].frobenius_psi2(P)
endomorphisms[2].frobenius_psi(endomorphisms[1])
else:
{.error: "Unconfigured".}

# 2. Decompose scalar into mini-scalars
const L = (C.getCurveOrderBitwidth() + M - 1) div M + 1
const L = (scalBits + M - 1) div M + 1 + 1 # A "+1" to handle negative
var miniScalars {.noInit.}: array[M, BigInt[L]]
when C == BN254_Snarks:
scalar.decomposeScalar_BN254_Snarks_G1(
miniScalars
)
elif C == BLS12_381:
scalar.decomposeScalar_BLS12_381_G1(
miniScalars
)
else:
{.error: "Unsupported curve for GLV acceleration".}
miniScalars.decomposeEndo(scalar, P.F)

# 3. TODO: handle negative mini-scalars
# Either negate the associated base and the scalar (in the `endomorphisms` array)
# Or use Algorithm 3 from Faz et al which can encode the sign
# in the GLV representation at the low low price of 1 bit
# 3. Handle negative mini-scalars
# A scalar decomposition might lead to negative miniscalar.
# For proper handling it requires either:
# 1. Negating it and then negating the corresponding curve point P
# 2. Adding an extra bit to the recoding, which will do the right thing™
#
# For implementation solution 1 is faster:
# - Double + Add is about 5000~8000 cycles on 6 64-bits limbs (BLS12-381)
# - Conditional negate is about 10 cycles per Fp, on G2 projective we have 3 (coords) * 2 (Fp2) * 10 (cycles) ~= 60 cycles
# We need to test the mini scalar, which is 65 bits so 2 Fp so about 2 cycles
# and negate it as well.
#
# However solution 1 seems to cause issues (TODO)
# with some of the BLS12-381 test cases (6 and 9)
# - 0x5668a2332db27199dcfb7cbdfca6317c2ff128db26d7df68483e0a095ec8e88f
# - 0x644dc62869683f0c93f38eaef2ba6912569dc91ec2806e46b4a3dd6a4421dad1

# 4. Precompute lookup table
var lut {.noInit.}: array[1 shl (M-1), ECP_SWei_Proj]
Expand Down Expand Up @@ -358,8 +375,8 @@ func w2TableIndex(glv: GLV_SAC, bit2: int, isNeg: var SecretBool): SecretWord {.
func computeRecodedLength(bitWidth, window: int): int =
# Strangely in the paper this doesn't depend
# "m", the GLV decomposition dimension.
# lw = ⌈log2 r/w⌉+1
let lw = ((bitWidth + window - 1) div window + 1)
# lw = ⌈log2 r/w⌉+1+1 (a "+1" to handle negative mini scalars)
let lw = (bitWidth + window - 1) div window + 1 + 1
result = (lw mod window) + lw

func scalarMulGLV_m2w2*[scalBits](
Expand All @@ -374,6 +391,10 @@ func scalarMulGLV_m2w2*[scalBits](
## via the GLV (Gallant-lambert-Vanstone) decomposition.
##
## For 2-dimensional decomposition with window 2
##
## Requires:
## - Cofactor to be cleared
## - 0 <= scalar < curve order
const C = P0.F.C # curve
static: doAssert: scalBits == C.getCurveOrderBitwidth()

Expand All @@ -384,16 +405,7 @@ func scalarMulGLV_m2w2*[scalBits](
# 2. Decompose scalar into mini-scalars
const L = computeRecodedLength(C.getCurveOrderBitwidth(), 2)
var miniScalars {.noInit.}: array[2, BigInt[L]]
when C == BN254_Snarks:
scalar.decomposeScalar_BN254_Snarks_G1(
miniScalars
)
elif C == BLS12_381:
scalar.decomposeScalar_BLS12_381_G1(
miniScalars
)
else:
{.error: "Unsupported curve for GLV acceleration".}
miniScalars.decomposeEndo(scalar, P0.F)

# 3. TODO: handle negative mini-scalars
# Either negate the associated base and the scalar (in the `endomorphisms` array)
Expand Down Expand Up @@ -553,7 +565,7 @@ when isMainModule:
)

var decomp: MultiScalar[M, L]
decomposeScalar_BN254_Snarks_G1(scalar, decomp)
decomp.decomposeEndo(scalar, Fp[BN254_Snarks])

doAssert: bool(decomp[0] == BigInt[L].fromHex"14928105460c820ccc9a25d0d953dbfe")
doAssert: bool(decomp[1] == BigInt[L].fromHex"13a2f911eb48a578844b901de6f41660")
Expand All @@ -564,7 +576,7 @@ when isMainModule:
)

var decomp: MultiScalar[M, L]
decomposeScalar_BN254_Snarks_G1(scalar, decomp)
decomp.decomposeEndo(scalar, Fp[BN254_Snarks])

doAssert: bool(decomp[0] == BigInt[L].fromHex"28cf7429c3ff8f7e82fc419e90cc3a2")
doAssert: bool(decomp[1] == BigInt[L].fromHex"457efc201bdb3d2e6087df36430a6db6")
Expand All @@ -575,7 +587,7 @@ when isMainModule:
)

var decomp: MultiScalar[M, L]
decomposeScalar_BN254_Snarks_G1(scalar, decomp)
decomp.decomposeEndo(scalar, Fp[BN254_Snarks])

doAssert: bool(decomp[0] == BigInt[L].fromHex"4da8c411566c77e00c902eb542aaa66b")
doAssert: bool(decomp[1] == BigInt[L].fromHex"5aa8f2f15afc3217f06677702bd4e41a")
Expand Down
Loading