forked from bitcoin-core/secp256k1
-
Notifications
You must be signed in to change notification settings - Fork 20
/
ecmult_gen_impl.h
273 lines (251 loc) · 13 KB
/
ecmult_gen_impl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/*******************************************************************************
* Copyright (c) 2013-2015, 2021 Pieter Wuille, Gregory Maxwell, Peter Dettman *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or https://www.opensource.org/licenses/mit-license.php. *
*******************************************************************************/
#ifndef SECP256K1_ECMULT_GEN_IMPL_H
#define SECP256K1_ECMULT_GEN_IMPL_H
#include "util.h"
#include "scalar.h"
#include "group.h"
#include "ecmult_gen.h"
#include "hash_impl.h"
#include "precomputed_ecmult_gen.h"
static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx) {
secp256k1_ecmult_gen_blind(ctx, NULL);
ctx->built = 1;
}
static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context* ctx) {
return ctx->built;
}
static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
ctx->built = 0;
secp256k1_scalar_clear(&ctx->scalar_offset);
secp256k1_ge_clear(&ctx->final_point_add);
}
static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) {
uint32_t comb_off;
secp256k1_ge add;
secp256k1_fe neg;
secp256k1_ge_storage adds;
secp256k1_scalar recoded;
int first = 1;
memset(&adds, 0, sizeof(adds));
/* We want to compute R = gn*G.
*
* To blind the scalar used in the computation, we rewrite this to be R = (gn-b)*G + b*G.
*
* Next, we write (gn-b)*G as a sum of values (2*bit_i-1) * 2^i * G, for i=0..COMB_BITS-1.
* The values bit_i can be found as the binary representation of recoded =
* (gn + 2^COMB_BITS - 1 - b)/2 (mod order).
*
* The value (2^COMB_BITS - 1 - b) is precomputed as ctx->scalar_offset, and bG is
* precomputed as ctx->final_point_add. Thus recoded can be written as
* recoded = (gn + scalar_offset)/2, and R becomes the sum of (2*bit_i-1)*2^i*G
* values plus final_point_add. */
/* Compute the recoded value as a scalar. */
secp256k1_scalar_add(&recoded, gn, &ctx->scalar_offset);
secp256k1_scalar_half(&recoded, &recoded);
/* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
* (2*bit_i-1) * 2^i * G points, for various combinations of i positions.
* We will rewrite our equation in terms of these table entries, as explained
* in Section 3.3 of "Fast and compact elliptic-curve cryptography" by
* Mike Hamburg (see https://eprint.iacr.org/2012/309).
*
* Let mask(b) = sum(2^(b*COMB_TEETH + t)*COMB_SPACING for t=0..COMB_TEETH-1),
* with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11,
* COMB_TEETH=6, COMB_SPACING=4, we would have:
* mask(0) = 2^0 + 2^4 + 2^8 + 2^12 + 2^16 + 2^20,
* mask(1) = 2^24 + 2^28 + 2^32 + 2^36 + 2^40 + 2^44,
* mask(2) = 2^48 + 2^52 + 2^56 + 2^60 + 2^64 + 2^68,
* ...
* mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260
*
* Imagine we have a table(b,m) function which can look up, given b and
* m=(recoded & mask(b)), the sum of (2*bit_i-1)*2^i*G for all bit positions
* i set in mask(b). In our example, table(1, 2^28 + 2^44) would be equal to
* (-2^24 + 2^28 + -2^32 + -2^36 + -2^40 + 2^244)*G.
*
* With that, we can rewrite R as:
* 1*(table(0, recoded & mask(0)) + table(1, recoded & mask(1)) + ...)
* + 2*(table(0, (recoded/2) & mask(0)) + table(1, (recoded/2) & mask(1)) + ...)
* + 4*(table(0, (recoded/4) & mask(0)) + table(1, (recoded/4) & mask(1)) + ...)
* + ...
* + 2^(COMB_SPACING-1)*(table(0, (recoded/2^(COMB_SPACING-1)) & mask(0)) + ...)
* + ctx->final_point_add.
*
* This is implemented using an outer loop that runs in reverse order over the lines
* of this equation, which in each iteration runs an inner loop that adds the terms
* of that line and the doubles the result before proceeding to the next line.
* In pseudocode:
* R = infinity
* for comb_off in range(COMB_SPACING - 1, -1, -1):
* for block in range(COMB_BLOCKS):
* R += table(block, (recoded >> comb_off) & mask(block))
* if comb_off > 0:
* R = 2*R
* R += final_point_add
* return R
*
* The last question is how to implement the table(b,m) function. For any value of
* b, m=(recoded & mask(b)) can only take on at most 2^COMB_TEETH possible values
* (the last one may have fewer as there mask(b) may the curve order). So we could
* create COMB_BLOCK tables which contain a value for each such m value.
*
* Due to the fact that every table entry is a sum of positive and negative powers
* of two multiplied by G, every table will contains pairs of negated points:
* if all the masked bits in m flip, the table value is negated. We can exploit this
* to only store the first half of every table. If an entry from the second half is
* needed, we look up its bit-flipped version instead, and conditionally negate it.
*/
/* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */
comb_off = COMB_SPACING - 1;
while (1) {
uint32_t block;
uint32_t bit_pos = comb_off;
/* Inner loop: for each block, add table entries to the result. */
for (block = 0; block < COMB_BLOCKS; ++block) {
/* Gather the mask(block)-selected bits of recoded into bits. They're packed
* together: bit (tooth) of bits = bit
* ((block*COMB_TEETH + tooth)*COMB_SPACING + comb_off) of recoded. */
uint32_t bits = 0, index, tooth = 0;
while (1) {
if (EXPECT(tooth == COMB_TEETH, 0)) {
/* All bits have been gathered.
*
* If the top bit of bits is 1, conditionally flip them all (corresponding
* to looking up the negated table value), and remember to negate the
* result in sign. */
uint32_t sign = (bits >> (COMB_TEETH - 1)) & 1;
uint32_t abs = (bits ^ -sign) & (COMB_POINTS - 1);
VERIFY_CHECK(sign == 0 || sign == 1);
VERIFY_CHECK(abs < COMB_POINTS);
/* This uses a conditional move to avoid any secret data in array indexes.
* _Any_ use of secret indexes has been demonstrated to result in timing
* sidechannels, even when the cache-line access patterns are uniform.
* See also:
* - "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and
* Peter Schwabe (https://cryptojedi.org/peter/data/chesrump-20130822.pdf)
* - "Cache Attacks and Countermeasures: the Case of AES", RSA 2006, by Dag
* Arne Osvik, Adi Shamir, and Eran Tromer
* (https://www.tau.ac.il/~tromer/papers/cache.pdf)
*/
for (index = 0; index < COMB_POINTS; ++index) {
secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == abs);
}
/* Set add=adds or add=-adds, in constant time, based on sign. */
secp256k1_ge_from_storage(&add, &adds);
secp256k1_fe_negate(&neg, &add.y, 1);
secp256k1_fe_cmov(&add.y, &neg, sign);
break;
#if COMB_BITS > 256
} else if (EXPECT(bit_pos >= 256, 0)) {
/* Some bit(s) of (mask(block) << comb_off) are outside of [0,256). This means
* we are also done constructing bits, but know its top bit should be zero, and no
* flipping/negating is needed. The table lookup can also be done over a
* smaller number of entries. */
/* Mask out junk in bits variable. */
bits &= ((1U << tooth) - 1);
VERIFY_CHECK(bits < COMB_POINTS);
for (index = 0; (index >> tooth) == 0; ++index) {
secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == bits);
}
secp256k1_ge_from_storage(&add, &adds);
break;
#endif
} else {
/* Gather another bit. To reduce side-channels from single-bit reads, don't
* actually fetch a single bit, but read higher bits too, which are XOR'ed
* into the upper bits of bits. On every iteration, an addition bits is
* made correct, starting at the bottom. The bits above that contain junk.
* See https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf
*/
uint32_t bitdata = secp256k1_scalar_get_bits(&recoded, bit_pos & ~0x1f, 32) >> (bit_pos & 0x1f);
VERIFY_CHECK(bit_pos < COMB_BITS && bit_pos < 256);
bits &= ~(1 << tooth);
bits ^= bitdata << tooth;
bit_pos += COMB_SPACING;
++tooth;
}
}
/* Add the looked up and conditionally negated value to r. */
if (EXPECT(first, 0)) {
/* If this is the first table lookup, we can skip addition. */
secp256k1_gej_set_ge(r, &add);
secp256k1_gej_rescale(r, &ctx->proj_blind);
first = 0;
} else {
secp256k1_gej_add_ge(r, r, &add);
}
}
/* Double the result, except in the last iteration. */
if (comb_off-- == 0) break;
secp256k1_gej_double(r, r);
}
/* Correct for the scalar_offset added at the start (final_point_add = b*G, while b was
* subtracted from the input scalar gn). */
secp256k1_gej_add_ge(r, r, &ctx->final_point_add);
/* Cleanup. */
secp256k1_fe_clear(&neg);
secp256k1_ge_clear(&add);
memset(&adds, 0, sizeof(adds));
secp256k1_scalar_clear(&recoded);
}
/* Setup blinding values for secp256k1_ecmult_gen. */
static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
secp256k1_scalar b;
secp256k1_scalar base_offset, negone;
unsigned i;
secp256k1_gej gb;
secp256k1_fe f;
unsigned char nonce32[32];
secp256k1_rfc6979_hmac_sha256 rng;
unsigned char keydata[64] = {0};
/* Compute base_offset = 2^COMB_BITS - 1. This could be precomputed. */
base_offset = secp256k1_scalar_one;
secp256k1_scalar_negate(&negone, &base_offset);
for (i = 0; i < COMB_BITS; ++i) {
secp256k1_scalar_add(&base_offset, &base_offset, &base_offset);
}
secp256k1_scalar_add(&base_offset, &base_offset, &negone);
if (seed32 == NULL) {
/* When seed is NULL, reset the final point and blinding value. */
secp256k1_ge_neg(&ctx->final_point_add, &secp256k1_ge_const_g);
ctx->scalar_offset = secp256k1_scalar_one;
secp256k1_scalar_add(&ctx->scalar_offset, &ctx->scalar_offset, &base_offset);
ctx->proj_blind = secp256k1_fe_one;
}
/* The prior blinding value (if not reset) is chained forward by including it in the hash. */
secp256k1_scalar_get_b32(nonce32, &ctx->scalar_offset);
/** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data,
* and guards against weak or adversarial seeds. This is a simpler and safer interface than
* asking the caller for blinding values directly and expecting them to retry on failure.
*/
memcpy(keydata, nonce32, 32);
if (seed32 != NULL) {
memcpy(keydata + 32, seed32, 32);
}
secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, seed32 ? 64 : 32);
memset(keydata, 0, sizeof(keydata));
/* Compute projective blinding factor (cannot be 0). */
secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
secp256k1_fe_set_b32(&f, nonce32);
secp256k1_fe_cmov(&f, &secp256k1_fe_one, secp256k1_fe_is_zero(&f));
ctx->proj_blind = f;
/* For a random blinding value b, set scalar_offset=base_offset-n, final_point_add=bG */
secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
secp256k1_scalar_set_b32(&b, nonce32, NULL);
/* The blinding value cannot be zero, as that would mean final_point_add = infinity,
* which secp256k1_gej_add_ge cannot handle. */
secp256k1_scalar_cmov(&b, &secp256k1_scalar_one, secp256k1_scalar_is_zero(&b));
secp256k1_rfc6979_hmac_sha256_finalize(&rng);
memset(nonce32, 0, 32);
secp256k1_ecmult_gen(ctx, &gb, &b);
secp256k1_scalar_negate(&b, &b);
secp256k1_scalar_add(&ctx->scalar_offset, &b, &base_offset);
secp256k1_ge_set_gej(&ctx->final_point_add, &gb);
/* Clean up. */
secp256k1_scalar_clear(&b);
secp256k1_gej_clear(&gb);
}
#endif /* SECP256K1_ECMULT_GEN_IMPL_H */