Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reciprocal square root, parallel r-sqrt/inv #362

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/bench_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,26 @@ void bench_field_sqrt_var(void* arg) {
}
}

void bench_field_rsqrt_var(void* arg) {
int i;
bench_inv_t *data = (bench_inv_t*)arg;

for (i = 0; i < 20000; i++) {
secp256k1_fe_rsqrt_var(&data->fe_x, &data->fe_y, &data->fe_x);
secp256k1_fe_add(&data->fe_x, &data->fe_y);
}
}

void bench_field_par_rsqrt_inv_var(void* arg) {
int i;
bench_inv_t *data = (bench_inv_t*)arg;

for (i = 0; i < 20000; i++) {
secp256k1_fe_par_rsqrt_inv_var(&data->fe_x, &data->fe_y, &data->fe_x, &data->fe_y);
secp256k1_fe_add(&data->fe_x, &data->fe_y);
}
}

void bench_group_double_var(void* arg) {
int i;
bench_inv_t *data = (bench_inv_t*)arg;
Expand Down Expand Up @@ -334,6 +354,8 @@ int main(int argc, char **argv) {
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, 20000);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, 20000);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt_var", bench_field_sqrt_var, bench_setup, NULL, &data, 10, 20000);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_rsqrt_var", bench_field_rsqrt_var, bench_setup, NULL, &data, 10, 20000);
if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt") || have_flag(argc, argv, "inverse")) run_benchmark("field_par_rsqrt_inv_var", bench_field_par_rsqrt_inv_var, bench_setup, NULL, &data, 10, 20000);

if (have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, 200000);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, 200000);
Expand Down
28 changes: 21 additions & 7 deletions src/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,11 @@ static void secp256k1_fe_normalize_weak(secp256k1_fe *r);
/** Normalize a field element, without constant-time guarantee. */
static void secp256k1_fe_normalize_var(secp256k1_fe *r);

/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
* implementation may optionally normalize the input, but this should not be relied upon. */
static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r);
/** Verify whether a field element represents zero i.e. would normalize to a zero value. */
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r);

/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
* implementation may optionally normalize the input, but this should not be relied upon. */
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r);
/** Verify whether a field element represents zero i.e. would normalize to a zero value. */
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r);

/** Set a field element equal to a small integer. Resulting field element is normalized. */
static void secp256k1_fe_set_int(secp256k1_fe *r, int a);
Expand Down Expand Up @@ -88,12 +86,28 @@ static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp2
static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a);

/** If a has a square root, it is computed in r and 1 is returned. If a does not
* have a square root, the root of its negation is computed and 0 is returned.
* have a square root, the root of -a is computed and 0 is returned.
* The input's magnitude can be at most 8. The output magnitude is 1 (but not
* guaranteed to be normalized). The result in r will always be a square
* itself. */
static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a);

/** If a has a square root, the square root is computed in rs, its reciprocal square root is
* calculated in rr, and 1 is returned. If a does not have a square root, the root (and recip. root)
* of -a are computed and 0 is returned. The input's magnitude can be at most 8. The
* outputs' magnitudes are 1 (but not guaranteed to be normalized). The result in rs will always be
* a square itself. The result in rr will be a square if, and only if, a is a square.
*/
static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a);

/** Parallel reciprocal square root and inverse. Sets ri to be the (modular) inverse of b. If a has a
* square root, the reciprocal of its square root is computed in rr and 1 is returned. If a does not
* have a square root, the reciprocal root of -a is computed and 0 is returned. The inputs'
* magnitudes can be at most 8. The outputs' magnitudes are 1 (but not guaranteed to be normalized).
* The result in rr will be a square if, and only if, a is a square.
*/
static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b);

/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
* at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a);
Expand Down
4 changes: 2 additions & 2 deletions src/field_10x26_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
#endif
}

static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];

Expand Down Expand Up @@ -217,7 +217,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
return (z0 == 0) | (z1 == 0x3FFFFFFUL);
}

static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
uint32_t z0, z1;
uint32_t x;
Expand Down
4 changes: 2 additions & 2 deletions src/field_5x52_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
#endif
}

static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
static int secp256k1_fe_normalizes_to_zero(const secp256k1_fe *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];

/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
Expand All @@ -190,7 +190,7 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
}

static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
static int secp256k1_fe_normalizes_to_zero_var(const secp256k1_fe *r) {
uint64_t t0, t1, t2, t3, t4;
uint64_t z0, z1;
uint64_t x;
Expand Down
182 changes: 87 additions & 95 deletions src/field_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,21 @@ SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe *a, const
return secp256k1_fe_normalizes_to_zero_var(&na);
}

static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {
/** Given that p is congruent to 3 mod 4, we can compute the square root of
* a mod p as the (p+1)/4'th power of a.
*
* As (p+1)/4 is an even number, it will have the same result for a and for
* (-a). Only one of these two numbers actually has a square root however,
* so we test at the end by squaring and comparing to the input.
* Also because (p+1)/4 is an even number, the computed square root is
* itself always a square (a ** ((p+1)/4) is the square of a ** ((p+1)/8)).
*/
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
static void secp256k1_fe_common_exp(secp256k1_fe *r1, secp256k1_fe *r2, const secp256k1_fe *a) {
secp256k1_fe t, x, x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223;
int j;

/** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
* { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
* 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
*/
CHECK(r1 != r2);

x = *a;

secp256k1_fe_sqr(&x2, &x);
secp256k1_fe_mul(&x2, &x2, &x);

secp256k1_fe_sqr(&x2, a);
secp256k1_fe_mul(&x2, &x2, a);
*r2 = x2;

secp256k1_fe_sqr(&x3, &x2);
secp256k1_fe_mul(&x3, &x3, a);
secp256k1_fe_mul(&x3, &x3, &x);

x6 = x3;
for (j=0; j<3; j++) {
Expand Down Expand Up @@ -108,112 +100,112 @@ static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {

/* The final result is then assembled using a sliding window over the blocks. */

t1 = x223;
t = x223;
for (j=0; j<23; j++) {
secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_sqr(&t, &t);
}
secp256k1_fe_mul(&t1, &t1, &x22);
for (j=0; j<6; j++) {
secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t, &t, &x22);

for (j=0; j<5; j++) {
secp256k1_fe_sqr(&t, &t);
}
secp256k1_fe_mul(&t1, &t1, &x2);
secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_sqr(r, &t1);
*r1 = t;
}

static int secp256k1_fe_sqrt_var(secp256k1_fe *r, const secp256k1_fe *a) {
secp256k1_fe t, x, x2;

x = *a;

secp256k1_fe_common_exp(&t, &x2, &x);

secp256k1_fe_sqr(&t, &t);
secp256k1_fe_mul(&t, &t, &x2);
secp256k1_fe_sqr(&t, &t);
secp256k1_fe_sqr(&t, &t);

*r = t;

/* Check that a square root was actually calculated */

secp256k1_fe_sqr(&t1, r);
return secp256k1_fe_equal_var(&t1, a);
secp256k1_fe_sqr(&t, &t);
return secp256k1_fe_equal_var(&t, &x);
}

static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
static int secp256k1_fe_rsqrt_var(secp256k1_fe *rs, secp256k1_fe *rr, const secp256k1_fe *a) {
secp256k1_fe t, x, x2;
int j;

/** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
* { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
* [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
*/
CHECK(rs != rr);

secp256k1_fe_sqr(&x2, a);
secp256k1_fe_mul(&x2, &x2, a);
x = *a;

secp256k1_fe_sqr(&x3, &x2);
secp256k1_fe_mul(&x3, &x3, a);
secp256k1_fe_common_exp(&t, &x2, &x);

x6 = x3;
secp256k1_fe_mul(&t, &t, &x);
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x6, &x6);
secp256k1_fe_sqr(&t, &t);
}
secp256k1_fe_mul(&x6, &x6, &x3);
secp256k1_fe_mul(&t, &t, &x2);

x9 = x6;
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x9, &x9);
}
secp256k1_fe_mul(&x9, &x9, &x3);
*rr = t;

x11 = x9;
for (j=0; j<2; j++) {
secp256k1_fe_sqr(&x11, &x11);
}
secp256k1_fe_mul(&x11, &x11, &x2);
secp256k1_fe_mul(&t, &t, &x);

x22 = x11;
for (j=0; j<11; j++) {
secp256k1_fe_sqr(&x22, &x22);
}
secp256k1_fe_mul(&x22, &x22, &x11);
*rs = t;

x44 = x22;
for (j=0; j<22; j++) {
secp256k1_fe_sqr(&x44, &x44);
}
secp256k1_fe_mul(&x44, &x44, &x22);
/* Check that a square root was actually calculated */

x88 = x44;
for (j=0; j<44; j++) {
secp256k1_fe_sqr(&x88, &x88);
}
secp256k1_fe_mul(&x88, &x88, &x44);
secp256k1_fe_sqr(&t, &t);
return secp256k1_fe_equal_var(&t, &x);
}

x176 = x88;
for (j=0; j<88; j++) {
secp256k1_fe_sqr(&x176, &x176);
}
secp256k1_fe_mul(&x176, &x176, &x88);
static int secp256k1_fe_par_rsqrt_inv_var(secp256k1_fe *rr, secp256k1_fe *ri, const secp256k1_fe *a, const secp256k1_fe *b) {

x220 = x176;
for (j=0; j<44; j++) {
secp256k1_fe_sqr(&x220, &x220);
}
secp256k1_fe_mul(&x220, &x220, &x44);
secp256k1_fe b2, ab2, ab4, sqrt, recip, t;
int ret;

x223 = x220;
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&x223, &x223);
}
secp256k1_fe_mul(&x223, &x223, &x3);
CHECK(rr != ri);

/* The final result is then assembled using a sliding window over the blocks. */
/* Zero inputs could possibly be handled with conditional moves, if necessary */
CHECK(!secp256k1_fe_normalizes_to_zero(a) && !secp256k1_fe_normalizes_to_zero(b));

t1 = x223;
for (j=0; j<23; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(&t1, &t1, &x22);
for (j=0; j<5; j++) {
secp256k1_fe_sqr(&t1, &t1);
}
secp256k1_fe_mul(&t1, &t1, a);
/* Calculate the reciprocal sqrt of a.b^4 */

secp256k1_fe_sqr(&b2, b);
secp256k1_fe_mul(&ab2, &b2, a);
secp256k1_fe_mul(&ab4, &ab2, &b2);

ret = secp256k1_fe_rsqrt_var(&sqrt, &recip, &ab4);

/* Inverse */
secp256k1_fe_sqr(&t, &recip);
secp256k1_fe_mul(&t, &t, &ab2);
secp256k1_fe_mul(&t, &t, b);
secp256k1_fe_sqr(&t, &t);
secp256k1_fe_mul(ri, b, &t);

/* Reciprocal */
secp256k1_fe_mul(rr, &recip, &b2);

return ret;
}

static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) {
secp256k1_fe t, x2;
int j;

secp256k1_fe_common_exp(&t, &x2, a);

secp256k1_fe_mul(&t, &t, a);
for (j=0; j<3; j++) {
secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_sqr(&t, &t);
}
secp256k1_fe_mul(&t1, &t1, &x2);
secp256k1_fe_mul(&t, &t, &x2);
for (j=0; j<2; j++) {
secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_sqr(&t, &t);
}
secp256k1_fe_mul(r, a, &t1);
secp256k1_fe_mul(r, a, &t);
}

static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *a) {
Expand Down
Loading