diff --git a/doc/safegcd_implementation.md b/doc/safegcd_implementation.md index 5216231e5..5dbbb7bbd 100644 --- a/doc/safegcd_implementation.md +++ b/doc/safegcd_implementation.md @@ -1,7 +1,7 @@ # The safegcd implementation in libsecp256k1 explained -This document explains the modular inverse implementation in the `src/modinv*.h` files. It is based -on the paper +This document explains the modular inverse and Jacobi symbol implementations in the `src/modinv*.h` files. +It is based on the paper ["Fast constant-time gcd computation and modular inversion"](https://gcd.cr.yp.to/papers.html#safegcd) by Daniel J. Bernstein and Bo-Yin Yang. The references below are for the Date: 2019.04.13 version. @@ -769,3 +769,51 @@ def modinv_var(M, Mi, x): d, e = update_de(d, e, t, M, Mi) return normalize(f, d, Mi) ``` + +## 8. From GCDs to Jacobi symbol + +We can also use a similar approach to calculate Jacobi symbol *(x | M)* by keeping track of an +extra variable *j*, for which at every step *(x | M) = j (g | f)*. As we update *f* and *g*, we +make corresponding updates to *j* using +[properties of the Jacobi symbol](https://en.wikipedia.org/wiki/Jacobi_symbol#Properties): +* *((g/2) | f)* is either *(g | f)* or *-(g | f)*, depending on the value of *f mod 8* (negating if it's *3* or *5*). +* *(f | g)* is either *(g | f)* or *-(g | f)*, depending on *f mod 4* and *g mod 4* (negating if both are *3*). + +These updates depend only on the values of *f* and *g* modulo *4* or *8*, and can thus be applied +very quickly, as long as we keep track of a few additional bits of *f* and *g*. Overall, this +calculation is slightly simpler than the one for the modular inverse because we no longer need to +keep track of *d* and *e*. + +However, one difficulty of this approach is that the Jacobi symbol *(a | n)* is only defined for +positive odd integers *n*, whereas in the original safegcd algorithm, *f, g* can take negative +values. We resolve this by using the following modified steps: + +```python + # Before + if delta > 0 and g & 1: + delta, f, g = 1 - delta, g, (g - f) // 2 + + # After + if delta > 0 and g & 1: + delta, f, g = 1 - delta, g, (g + f) // 2 +``` + +The algorithm is still correct, since the changed divstep, called a "posdivstep" (see section 8.4 +and E.5 in the paper) preserves *gcd(f, g)*. However, there's no proof that the modified algorithm +will converge. The justification for posdivsteps is completely empirical: in practice, it appears +that the vast majority of nonzero inputs converge to *f=g=gcd(f0, g0)* in a +number of steps proportional to their logarithm. + +Note that: +- We require inputs to satisfy *gcd(x, M) = 1*, as otherwise *f=1* is not reached. +- We require inputs *x &neq; 0*, because applying posdivstep with *g=0* has no effect. +- We need to update the termination condition from *g=0* to *f=1*. + +We account for the possibility of nonconvergence by only performing a bounded number of +posdivsteps, and then falling back to square-root based Jacobi calculation if a solution has not +yet been found. + +The optimizations in sections 3-7 above are described in the context of the original divsteps, but +in the C implementation we also adapt most of them (not including "avoiding modulus operations", +since it's not necessary to track *d, e*, and "constant-time operation", since we never calculate +Jacobi symbols for secret data) to the posdivsteps version. diff --git a/src/bench_internal.c b/src/bench_internal.c index e1ef14fdb..c248ab8eb 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -219,6 +219,19 @@ static void bench_field_sqrt(void* arg, int iters) { CHECK(j <= iters); } +static void bench_field_is_square_var(void* arg, int iters) { + int i, j = 0; + bench_inv *data = (bench_inv*)arg; + secp256k1_fe t = data->fe[0]; + + for (i = 0; i < iters; i++) { + j += secp256k1_fe_is_square_var(&t); + secp256k1_fe_add(&t, &data->fe[1]); + secp256k1_fe_normalize_var(&t); + } + CHECK(j <= iters); +} + static void bench_group_double_var(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -371,6 +384,7 @@ int main(int argc, char **argv) { if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, iters*10); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, iters); + if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "issquare")) run_benchmark("field_is_square_var", bench_field_is_square_var, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt", bench_field_sqrt, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, iters*10); diff --git a/src/field.h b/src/field.h index 4b57548be..ccb4327ee 100644 --- a/src/field.h +++ b/src/field.h @@ -135,4 +135,7 @@ static void secp256k1_fe_half(secp256k1_fe *r); * magnitude set to 'm' and is normalized if (and only if) 'm' is zero. */ static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m); +/** Determine whether a is a square (modulo p). */ +static int secp256k1_fe_is_square_var(const secp256k1_fe *a); + #endif /* SECP256K1_FIELD_H */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 5e6c63cf4..72c56df02 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -1365,4 +1365,31 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp)); } +static int secp256k1_fe_is_square_var(const secp256k1_fe *x) { + secp256k1_fe tmp; + secp256k1_modinv32_signed30 s; + int jac, ret; + + tmp = *x; + secp256k1_fe_normalize_var(&tmp); + /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */ + if (secp256k1_fe_is_zero(&tmp)) return 1; + secp256k1_fe_to_signed30(&s, &tmp); + jac = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe); + if (jac == 0) { + /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back + * to computing a square root. This should be extremely rare with random + * input (except in VERIFY mode, where a lower iteration count is used). */ + secp256k1_fe dummy; + ret = secp256k1_fe_sqrt(&dummy, &tmp); + } else { +#ifdef VERIFY + secp256k1_fe dummy; + VERIFY_CHECK(jac == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1); +#endif + ret = jac >= 0; + } + return ret; +} + #endif /* SECP256K1_FIELD_REPR_IMPL_H */ diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index 985c486ae..538991c7a 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -664,4 +664,31 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { #endif } +static int secp256k1_fe_is_square_var(const secp256k1_fe *x) { + secp256k1_fe tmp; + secp256k1_modinv64_signed62 s; + int jac, ret; + + tmp = *x; + secp256k1_fe_normalize_var(&tmp); + /* secp256k1_jacobi64_maybe_var cannot deal with input 0. */ + if (secp256k1_fe_is_zero(&tmp)) return 1; + secp256k1_fe_to_signed62(&s, &tmp); + jac = secp256k1_jacobi64_maybe_var(&s, &secp256k1_const_modinfo_fe); + if (jac == 0) { + /* secp256k1_jacobi64_maybe_var failed to compute the Jacobi symbol. Fall back + * to computing a square root. This should be extremely rare with random + * input (except in VERIFY mode, where a lower iteration count is used). */ + secp256k1_fe dummy; + ret = secp256k1_fe_sqrt(&dummy, &tmp); + } else { +#ifdef VERIFY + secp256k1_fe dummy; + VERIFY_CHECK(jac == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1); +#endif + ret = jac >= 0; + } + return ret; +} + #endif /* SECP256K1_FIELD_REPR_IMPL_H */ diff --git a/src/int128.h b/src/int128.h index 8c1c061a8..5355fbfae 100644 --- a/src/int128.h +++ b/src/int128.h @@ -80,10 +80,10 @@ static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_ /* Compare two 128-bit values for equality. */ static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b); -/* Tests if r is equal to 2^n. +/* Tests if r is equal to sign*2^n (sign must be 1 or -1). * n must be strictly less than 127. */ -static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n); +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign); #endif diff --git a/src/int128_native_impl.h b/src/int128_native_impl.h index 30c26f2d3..996e542cf 100644 --- a/src/int128_native_impl.h +++ b/src/int128_native_impl.h @@ -84,9 +84,10 @@ static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, con return *a == *b; } -static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) { +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) { VERIFY_CHECK(n < 127); - return (*r == (int128_t)1 << n); + VERIFY_CHECK(sign == 1 || sign == -1); + return (*r == (int128_t)((uint128_t)sign << n)); } #endif diff --git a/src/int128_struct_impl.h b/src/int128_struct_impl.h index 298a7bb1a..2eb337cb5 100644 --- a/src/int128_struct_impl.h +++ b/src/int128_struct_impl.h @@ -189,10 +189,11 @@ static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, con return a->hi == b->hi && a->lo == b->lo; } -static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) { - VERIFY_CHECK(n < 127); - return n >= 64 ? r->hi == (uint64_t)1 << (n - 64) && r->lo == 0 - : r->hi == 0 && r->lo == (uint64_t)1 << n; +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) { + VERIFY_CHECK(n < 127); + VERIFY_CHECK(sign == 1 || sign == -1); + return n >= 64 ? r->hi == (uint64_t)sign << (n - 64) && r->lo == 0 + : r->hi == (uint64_t)((sign - 1) >> 1) && r->lo == (uint64_t)sign << n; } #endif diff --git a/src/modinv32.h b/src/modinv32.h index d1e6e23c1..846c642f8 100644 --- a/src/modinv32.h +++ b/src/modinv32.h @@ -35,4 +35,9 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256 /* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */ static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); +/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus + * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result + * cannot be computed. */ +static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); + #endif /* SECP256K1_MODINV32_H */ diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h index 661c5fc04..028a5701e 100644 --- a/src/modinv32_impl.h +++ b/src/modinv32_impl.h @@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_ return zeta; } +/* inv256[i] = -(2*i+1)^-1 (mod 256) */ +static const uint8_t secp256k1_modinv32_inv256[128] = { + 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59, + 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31, + 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89, + 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61, + 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9, + 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91, + 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9, + 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1, + 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19, + 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1, + 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01 +}; + /* Compute the transition matrix and eta for 30 divsteps (variable time). * * Input: eta: initial eta @@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_ * Implements the divsteps_n_matrix_var function from the explanation. */ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { - /* inv256[i] = -(2*i+1)^-1 (mod 256) */ - static const uint8_t inv256[128] = { - 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59, - 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31, - 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89, - 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61, - 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9, - 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91, - 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9, - 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1, - 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19, - 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1, - 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01 - }; - /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */ uint32_t u = 1, v = 0, q = 0, r = 1; uint32_t f = f0, g = g0, m; @@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint VERIFY_CHECK(limit > 0 && limit <= 30); m = (UINT32_MAX >> (32 - limit)) & 255U; /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ - w = (g * inv256[(f >> 1) & 127]) & m; + w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; /* Do so. */ g += f * w; q += u * w; @@ -317,6 +317,86 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint return eta; } +/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track + * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because + * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2). + * + * Input: eta: initial eta + * f0: bottom limb of initial f + * g0: bottom limb of initial g + * Output: t: transition matrix + * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign + * by applying the returned transformation matrix to it. The other bits of *jacp may + * change, but are meaningless. + * Return: final eta + */ +static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) { + /* Transformation matrix. */ + uint32_t u = 1, v = 0, q = 0, r = 1; + uint32_t f = f0, g = g0, m; + uint16_t w; + int i = 30, limit, zeros; + int jac = *jacp; + + for (;;) { + /* Use a sentinel bit to count zeros only up to i. */ + zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i)); + /* Perform zeros divsteps at once; they all just divide g by two. */ + g >>= zeros; + u <<= zeros; + v <<= zeros; + eta -= zeros; + i -= zeros; + /* Update the bottom bit of jac: when dividing g by an odd power of 2, + * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */ + jac ^= (zeros & ((f >> 1) ^ (f >> 2))); + /* We're done once we've done 30 posdivsteps. */ + if (i == 0) break; + VERIFY_CHECK((f & 1) == 1); + VERIFY_CHECK((g & 1) == 1); + VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i)); + VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i)); + /* If eta is negative, negate it and replace f,g with g,f. */ + if (eta < 0) { + uint32_t tmp; + eta = -eta; + /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign + * if both f and g are 3 mod 4. */ + jac ^= ((f & g) >> 1); + tmp = f; f = g; g = tmp; + tmp = u; u = q; q = tmp; + tmp = v; v = r; r = tmp; + } + /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more + * than i can be cancelled out (as we'd be done before that point), and no more than eta+1 + * can be done as its sign will flip once that happens. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */ + VERIFY_CHECK(limit > 0 && limit <= 30); + m = (UINT32_MAX >> (32 - limit)) & 255U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ + w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; + /* Do so. */ + g += f * w; + q += u * w; + r += v * w; + VERIFY_CHECK((g & m) == 0); + } + /* Return data in t and return value. */ + t->u = (int32_t)u; + t->v = (int32_t)v; + t->q = (int32_t)q; + t->r = (int32_t)r; + /* The determinant of t must be a power of two. This guarantees that multiplication with t + * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which + * will be divided out again). As each divstep's individual matrix has determinant 2 or -2, + * the aggregate of 30 of them will have determinant 2^30 or -2^30. */ + VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 || + (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30)); + *jacp = jac; + return eta; +} + /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps. * * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range @@ -584,4 +664,74 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256 *x = d; } +/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1. + * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */ +#ifdef VERIFY +#define JACOBI32_ITERATIONS 25 +#else +#define JACOBI32_ITERATIONS 50 +#endif + +/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */ +static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { + /* Start with f=modulus, g=x, eta=-1. */ + secp256k1_modinv32_signed30 f = modinfo->modulus; + secp256k1_modinv32_signed30 g = *x; + int j, len = 9; + int32_t eta = -1; /* eta = -delta; delta is initially 1 */ + int32_t cond, fn, gn; + int jac = 0; + int count; + + /* The input limbs must all be non-negative. */ + VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0); + + /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we + * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or + * time out). */ + VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0); + + for (count = 0; count < JACOBI32_ITERATIONS; ++count) { + /* Compute transition matrix and new eta after 30 posdivsteps. */ + secp256k1_modinv32_trans2x2 t; + eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac); + /* Update f,g using that transition matrix. */ +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t); + /* If the bottom limb of f is 1, there is a chance that f=1. */ + if (f.v[0] == 1) { + cond = 0; + /* Check if the other limbs are also 0. */ + for (j = 1; j < len; ++j) { + cond |= f.v[j]; + } + /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */ + if (cond == 0) return 1 - 2*(jac & 1); + } + + /* Determine if len>1 and limb (len-1) of both f and g is 0. */ + fn = f.v[len - 1]; + gn = g.v[len - 1]; + cond = ((int32_t)len - 2) >> 31; + cond |= fn; + cond |= gn; + /* If so, reduce length. */ + if (cond == 0) --len; +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + } + + /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */ + return 0; +} + #endif /* SECP256K1_MODINV32_IMPL_H */ diff --git a/src/modinv64.h b/src/modinv64.h index 1d69f14f3..f4208e6c2 100644 --- a/src/modinv64.h +++ b/src/modinv64.h @@ -39,4 +39,9 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256 /* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */ static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo); +/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus + * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result + * cannot be computed. */ +static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo); + #endif /* SECP256K1_MODINV64_H */ diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h index 2b5463a3f..df9aedff8 100644 --- a/src/modinv64_impl.h +++ b/src/modinv64_impl.h @@ -71,11 +71,13 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i return 0; } -/* Check if the determinant of t is equal to 1 << n. */ -static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n) { +/* Check if the determinant of t is equal to 1 << n. If abs, check if |det t| == 1 << n. */ +static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n, int abs) { secp256k1_int128 a; secp256k1_i128_det(&a, t->u, t->v, t->q, t->r); - return secp256k1_i128_check_pow2(&a, n); + if (secp256k1_i128_check_pow2(&a, n, 1)) return 1; + if (abs && secp256k1_i128_check_pow2(&a, n, -1)) return 1; + return 0; } #endif @@ -218,7 +220,7 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_ * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial * 8*identity (which has determinant 2^6) means the overall outputs has determinant * 2^65. */ - VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65)); + VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65, 0)); #endif return zeta; } @@ -266,7 +268,7 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint tmp = v; v = r; r = -tmp; /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled * out (as we'd be done before that point), and no more than eta+1 can be done as its - * will flip again once that happens. */ + * sign will flip again once that happens. */ limit = ((int)eta + 1) > i ? i : ((int)eta + 1); VERIFY_CHECK(limit > 0 && limit <= 62); /* m is a mask for the bottom min(limit, 6) bits. */ @@ -301,8 +303,100 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * will be divided out again). As each divstep's individual matrix has determinant 2, the * aggregate of 62 of them will have determinant 2^62. */ - VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62)); + VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 0)); +#endif + return eta; +} + +/* Compute the transition matrix and eta for 62 posdivsteps (variable time, eta=-delta), and keeps track + * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^64 rather than 2^62, because + * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2). + * + * Input: eta: initial eta + * f0: bottom limb of initial f + * g0: bottom limb of initial g + * Output: t: transition matrix + * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign + * by applying the returned transformation matrix to it. The other bits of *jacp may + * change, but are meaningless. + * Return: final eta + */ +static int64_t secp256k1_modinv64_posdivsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t, int *jacp) { + /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */ + uint64_t u = 1, v = 0, q = 0, r = 1; + uint64_t f = f0, g = g0, m; + uint32_t w; + int i = 62, limit, zeros; + int jac = *jacp; + + for (;;) { + /* Use a sentinel bit to count zeros only up to i. */ + zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i)); + /* Perform zeros divsteps at once; they all just divide g by two. */ + g >>= zeros; + u <<= zeros; + v <<= zeros; + eta -= zeros; + i -= zeros; + /* Update the bottom bit of jac: when dividing g by an odd power of 2, + * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */ + jac ^= (zeros & ((f >> 1) ^ (f >> 2))); + /* We're done once we've done 62 posdivsteps. */ + if (i == 0) break; + VERIFY_CHECK((f & 1) == 1); + VERIFY_CHECK((g & 1) == 1); + VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i)); + VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i)); + /* If eta is negative, negate it and replace f,g with g,f. */ + if (eta < 0) { + uint64_t tmp; + eta = -eta; + tmp = f; f = g; g = tmp; + tmp = u; u = q; q = tmp; + tmp = v; v = r; r = tmp; + /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign + * if both f and g are 3 mod 4. */ + jac ^= ((f & g) >> 1); + /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled + * out (as we'd be done before that point), and no more than eta+1 can be done as its + * sign will flip again once that happens. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + VERIFY_CHECK(limit > 0 && limit <= 62); + /* m is a mask for the bottom min(limit, 6) bits. */ + m = (UINT64_MAX >> (64 - limit)) & 63U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6) + * bits. */ + w = (f * g * (f * f - 2)) & m; + } else { + /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as + * eta tends to be smaller here. */ + limit = ((int)eta + 1) > i ? i : ((int)eta + 1); + VERIFY_CHECK(limit > 0 && limit <= 62); + /* m is a mask for the bottom min(limit, 4) bits. */ + m = (UINT64_MAX >> (64 - limit)) & 15U; + /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4) + * bits. */ + w = f + (((f + 1) & 4) << 1); + w = (-w * g) & m; + } + g += f * w; + q += u * w; + r += v * w; + VERIFY_CHECK((g & m) == 0); + } + /* Return data in t and return value. */ + t->u = (int64_t)u; + t->v = (int64_t)v; + t->q = (int64_t)q; + t->r = (int64_t)r; +#ifdef VERIFY + /* The determinant of t must be a power of two. This guarantees that multiplication with t + * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which + * will be divided out again). As each divstep's individual matrix has determinant 2 or -2, + * the aggregate of 62 of them will have determinant 2^62 or -2^62. */ + VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 1)); #endif + *jacp = jac; return eta; } @@ -626,4 +720,74 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256 *x = d; } +/* Do up to 25 iterations of 62 posdivsteps (up to 1550 steps; more is extremely rare) each until f=1. + * In VERIFY mode use a lower number of iterations (744, close to the median 756), so failure actually occurs. */ +#ifdef VERIFY +#define JACOBI64_ITERATIONS 12 +#else +#define JACOBI64_ITERATIONS 25 +#endif + +/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */ +static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) { + /* Start with f=modulus, g=x, eta=-1. */ + secp256k1_modinv64_signed62 f = modinfo->modulus; + secp256k1_modinv64_signed62 g = *x; + int j, len = 5; + int64_t eta = -1; /* eta = -delta; delta is initially 1 */ + int64_t cond, fn, gn; + int jac = 0; + int count; + + /* The input limbs must all be non-negative. */ + VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0); + + /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we + * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or + * time out). */ + VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4]) != 0); + + for (count = 0; count < JACOBI64_ITERATIONS; ++count) { + /* Compute transition matrix and new eta after 62 posdivsteps. */ + secp256k1_modinv64_trans2x2 t; + eta = secp256k1_modinv64_posdivsteps_62_var(eta, f.v[0] | ((uint64_t)f.v[1] << 62), g.v[0] | ((uint64_t)g.v[1] << 62), &t, &jac); + /* Update f,g using that transition matrix. */ +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t); + /* If the bottom limb of f is 1, there is a chance that f=1. */ + if (f.v[0] == 1) { + cond = 0; + /* Check if the other limbs are also 0. */ + for (j = 1; j < len; ++j) { + cond |= f.v[j]; + } + /* If so, we're done. When f=1, the Jacobi symbol (g | f)=1. */ + if (cond == 0) return 1 - 2*(jac & 1); + } + + /* Determine if len>1 and limb (len-1) of both f and g is 0. */ + fn = f.v[len - 1]; + gn = g.v[len - 1]; + cond = ((int64_t)len - 2) >> 63; + cond |= fn; + cond |= gn; + /* If so, reduce length. */ + if (cond == 0) --len; +#ifdef VERIFY + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ + VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ +#endif + } + + /* The loop failed to converge to f=g after 1550 iterations. Return 0, indicating unknown result. */ + return 0; +} + #endif /* SECP256K1_MODINV64_IMPL_H */ diff --git a/src/tests.c b/src/tests.c index bc5b7cb1f..e80397cb9 100644 --- a/src/tests.c +++ b/src/tests.c @@ -1022,12 +1022,32 @@ static void test_modinv32_uint16(uint16_t* out, const uint16_t* in, const uint16 uint16_to_signed30(&x, in); nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4] | x.v[5] | x.v[6] | x.v[7] | x.v[8]) != 0; uint16_to_signed30(&m.modulus, mod); - mutate_sign_signed30(&m.modulus); /* compute 1/modulus mod 2^30 */ m.modulus_inv30 = modinv2p64(m.modulus.v[0]) & 0x3fffffff; CHECK(((m.modulus_inv30 * m.modulus.v[0]) & 0x3fffffff) == 1); + /* Test secp256k1_jacobi32_maybe_var. */ + if (nonzero) { + int jac; + uint16_t sqr[16], negone[16]; + mulmod256(sqr, in, in, mod); + uint16_to_signed30(&x, sqr); + /* Compute jacobi symbol of in^2, which must be 1 (or uncomputable). */ + jac = secp256k1_jacobi32_maybe_var(&x, &m); + CHECK(jac == 0 || jac == 1); + /* Then compute the jacobi symbol of -(in^2). x and -x have opposite + * jacobi symbols if and only if (mod % 4) == 3. */ + negone[0] = mod[0] - 1; + for (i = 1; i < 16; ++i) negone[i] = mod[i]; + mulmod256(sqr, sqr, negone, mod); + uint16_to_signed30(&x, sqr); + jac = secp256k1_jacobi32_maybe_var(&x, &m); + CHECK(jac == 0 || jac == 1 - (mod[0] & 2)); + } + + uint16_to_signed30(&x, in); + mutate_sign_signed30(&m.modulus); for (vartime = 0; vartime < 2; ++vartime) { /* compute inverse */ (vartime ? secp256k1_modinv32_var : secp256k1_modinv32)(&x, &m); @@ -1095,12 +1115,32 @@ static void test_modinv64_uint16(uint16_t* out, const uint16_t* in, const uint16 uint16_to_signed62(&x, in); nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4]) != 0; uint16_to_signed62(&m.modulus, mod); - mutate_sign_signed62(&m.modulus); /* compute 1/modulus mod 2^62 */ m.modulus_inv62 = modinv2p64(m.modulus.v[0]) & M62; CHECK(((m.modulus_inv62 * m.modulus.v[0]) & M62) == 1); + /* Test secp256k1_jacobi64_maybe_var. */ + if (nonzero) { + int jac; + uint16_t sqr[16], negone[16]; + mulmod256(sqr, in, in, mod); + uint16_to_signed62(&x, sqr); + /* Compute jacobi symbol of in^2, which must be 1 (or uncomputable). */ + jac = secp256k1_jacobi64_maybe_var(&x, &m); + CHECK(jac == 0 || jac == 1); + /* Then compute the jacobi symbol of -(in^2). x and -x have opposite + * jacobi symbols if and only if (mod % 4) == 3. */ + negone[0] = mod[0] - 1; + for (i = 1; i < 16; ++i) negone[i] = mod[i]; + mulmod256(sqr, sqr, negone, mod); + uint16_to_signed62(&x, sqr); + jac = secp256k1_jacobi64_maybe_var(&x, &m); + CHECK(jac == 0 || jac == 1 - (mod[0] & 2)); + } + + uint16_to_signed62(&x, in); + mutate_sign_signed62(&m.modulus); for (vartime = 0; vartime < 2; ++vartime) { /* compute inverse */ (vartime ? secp256k1_modinv64_var : secp256k1_modinv64)(&x, &m); @@ -2022,30 +2062,56 @@ static void run_int128_test_case(void) { } CHECK(secp256k1_i128_eq_var(&swa, &swz) == expect); } - /* test secp256k1_i128_check_pow2 */ + /* test secp256k1_i128_check_pow2 (sign == 1) */ { int expect = (uc & 1); int pos = ub % 127; if (expect) { - /* If expect==1, set swz to exactly (2 << pos). */ + /* If expect==1, set swz to exactly 2^pos. */ uint64_t hi = 0; uint64_t lo = 0; - if (pos & 64) { + if (pos >= 64) { hi = (((uint64_t)1) << (pos & 63)); } else { lo = (((uint64_t)1) << (pos & 63)); } secp256k1_i128_load(&swz, hi, lo); } else { - /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal (2 << pos). */ - if (pos & 64) { + /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal 2^pos. */ + if (pos >= 64) { if ((v[1] == (((uint64_t)1) << (pos & 63))) && v[0] == 0) expect = 1; } else { if ((v[0] == (((uint64_t)1) << (pos & 63))) && v[1] == 0) expect = 1; } swz = swa; } - CHECK(secp256k1_i128_check_pow2(&swz, pos) == expect); + CHECK(secp256k1_i128_check_pow2(&swz, pos, 1) == expect); + } + /* test secp256k1_i128_check_pow2 (sign == -1) */ + { + int expect = (uc & 1); + int pos = ub % 127; + if (expect) { + /* If expect==1, set swz to exactly -2^pos. */ + uint64_t hi = ~(uint64_t)0; + uint64_t lo = ~(uint64_t)0; + if (pos >= 64) { + hi <<= (pos & 63); + lo = 0; + } else { + lo <<= (pos & 63); + } + secp256k1_i128_load(&swz, hi, lo); + } else { + /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal -2^pos. */ + if (pos >= 64) { + if ((v[1] == ((~(uint64_t)0) << (pos & 63))) && v[0] == 0) expect = 1; + } else { + if ((v[0] == ((~(uint64_t)0) << (pos & 63))) && v[1] == ~(uint64_t)0) expect = 1; + } + swz = swa; + } + CHECK(secp256k1_i128_check_pow2(&swz, pos, -1) == expect); } } @@ -3233,8 +3299,10 @@ static void run_sqrt(void) { for (j = 0; j < COUNT; j++) { random_fe(&x); secp256k1_fe_sqr(&s, &x); + CHECK(secp256k1_fe_is_square_var(&s)); test_sqrt(&s, &x); secp256k1_fe_negate(&t, &s, 1); + CHECK(!secp256k1_fe_is_square_var(&t)); test_sqrt(&t, NULL); secp256k1_fe_mul(&t, &s, &ns); test_sqrt(&t, NULL);