From 6cd3777b7d9c818e473e27f2eb5d1f5007b8816b Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 16:37:07 +0200 Subject: [PATCH] Speedup for G1 in other curves. --- src/epx/relic_ep4_mul.c | 57 ++++++++++++++++----------- src/epx/relic_ep8_mul.c | 53 +++++++++++++++---------- src/low/x64-asm-8l/relic_bn_mul_low.c | 2 +- 3 files changed, 67 insertions(+), 45 deletions(-) diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index a6cb8a5ba..e010ac30f 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -82,23 +82,27 @@ static void ep4_psi(ep4_t r, const ep4_t p) { #if EP_MUL == LWNAF || !defined(STRIP) -static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { +static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { size_t l, _l[8]; bn_t n, _k[8], u; int8_t naf[8][RLC_FP_BITS + 1]; - ep4_t q[8]; + ep4_t q, t[8][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep4_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 8; i++) { + ep4_new(q); + for (size_t i = 0; i < 8; i++) { bn_null(_k[i]); - ep4_null(q[i]); bn_new(_k[i]); - ep4_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_null(t[i][j]); + ep4_new(t[i][j]); + } } ep4_curve_get_ord(n); @@ -106,34 +110,37 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep4_norm(q[0], p); - for (size_t i = 1; i < 8; i++) { - ep4_psi(q[i], q[i - 1]); - } -#if defined(EP_MIXED) - ep4_norm_sim(q + 1, q + 1, 7); -#endif - l = 0; for (size_t i = 0; i < 8; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep4_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep4_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep4_neg(q, q); + } + ep4_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep4_neg(t[i][j], t[i][j]); + } + } + } } ep4_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep4_dbl(r, r); - for (int i = 0; i < 8; i++) { + for (size_t i = 0; i < 8; i++) { if (naf[i][j] > 0) { - ep4_add(r, r, q[i]); + ep4_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep4_sub(r, r, q[i]); + ep4_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -147,11 +154,13 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 8; i++) { + ep4_free(q); + for (size_t i = 0; i < 8; i++) { bn_free(_k[i]); - ep4_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_free(t[i][j]); + } } - } } @@ -647,7 +656,7 @@ void ep4_mul_lwnaf(ep4_t r, const ep4_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep4_mul_glv_imp(r, p, k); + ep4_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/epx/relic_ep8_mul.c b/src/epx/relic_ep8_mul.c index 5300e8933..c741b7c68 100644 --- a/src/epx/relic_ep8_mul.c +++ b/src/epx/relic_ep8_mul.c @@ -40,23 +40,27 @@ #if EP_MUL == LWNAF || !defined(STRIP) -static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { +static void ep8_mul_gls_imp(ep8_t r, const ep8_t p, const bn_t k) { size_t l, _l[16]; bn_t n, _k[16], u; int8_t naf[16][RLC_FP_BITS + 1]; - ep8_t q[16]; + ep8_t q, t[16][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep8_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 16; i++) { + ep8_new(q); + for (size_t i = 0; i < 16; i++) { bn_null(_k[i]); - ep8_null(q[i]); bn_new(_k[i]); - ep8_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_null(t[i][j]); + ep8_new(t[i][j]); + } } ep8_curve_get_ord(n); @@ -64,31 +68,37 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep8_norm(q[0], p); - for (size_t i = 1; i < 16; i++) { - ep8_frb(q[i], q[i - 1], 1); - } - l = 0; for (size_t i = 0; i < 16; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep8_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep8_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep8_neg(q, q); + } + ep8_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep8_neg(t[i][j], t[i][j]); + } + } + } } ep8_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep8_dbl(r, r); - for (int i = 0; i < 16; i++) { + for (size_t i = 0; i < 16; i++) { if (naf[i][j] > 0) { - ep8_add(r, r, q[i]); + ep8_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep8_sub(r, r, q[i]); + ep8_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -102,9 +112,12 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 16; i++) { + ep8_free(q); + for (size_t i = 0; i < 16; i++) { bn_free(_k[i]); - ep8_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_free(t[i][j]); + } } } } @@ -595,7 +608,7 @@ void ep8_mul_lwnaf(ep8_t r, const ep8_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep8_mul_glv_imp(r, p, k); + ep8_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/low/x64-asm-8l/relic_bn_mul_low.c b/src/low/x64-asm-8l/relic_bn_mul_low.c index 0839f9010..2c8c26e29 100644 --- a/src/low/x64-asm-8l/relic_bn_mul_low.c +++ b/src/low/x64-asm-8l/relic_bn_mul_low.c @@ -53,7 +53,7 @@ void bn_muln_low(dig_t *c, const dig_t *a, const dig_t *b, size_t size) { } void bn_muld_low(dig_t *c, const dig_t *a, size_t sa, const dig_t *b, size_t sb, - int low, int high) { + uint_t low, uint_t high) { (void)low; (void)high; mpn_mul(c, a, sa, b, sb);