Skip to content

Commit 3465893

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Merge crypto-2.6 to pick up NEON yield revert.
2 parents d6e4379 + f10dc56 commit 3465893

File tree

3 files changed

+86
-148
lines changed

3 files changed

+86
-148
lines changed

arch/arm64/crypto/aes-ce-ccm-core.S

Lines changed: 55 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,33 +19,24 @@
1919
* u32 *macp, u8 const rk[], u32 rounds);
2020
*/
2121
ENTRY(ce_aes_ccm_auth_data)
22-
frame_push 7
23-
24-
mov x19, x0
25-
mov x20, x1
26-
mov x21, x2
27-
mov x22, x3
28-
mov x23, x4
29-
mov x24, x5
30-
31-
ldr w25, [x22] /* leftover from prev round? */
22+
ldr w8, [x3] /* leftover from prev round? */
3223
ld1 {v0.16b}, [x0] /* load mac */
33-
cbz w25, 1f
34-
sub w25, w25, #16
24+
cbz w8, 1f
25+
sub w8, w8, #16
3526
eor v1.16b, v1.16b, v1.16b
36-
0: ldrb w7, [x20], #1 /* get 1 byte of input */
37-
subs w21, w21, #1
38-
add w25, w25, #1
27+
0: ldrb w7, [x1], #1 /* get 1 byte of input */
28+
subs w2, w2, #1
29+
add w8, w8, #1
3930
ins v1.b[0], w7
4031
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
4132
beq 8f /* out of input? */
42-
cbnz w25, 0b
33+
cbnz w8, 0b
4334
eor v0.16b, v0.16b, v1.16b
44-
1: ld1 {v3.4s}, [x23] /* load first round key */
45-
prfm pldl1strm, [x20]
46-
cmp w24, #12 /* which key size? */
47-
add x6, x23, #16
48-
sub w7, w24, #2 /* modified # of rounds */
35+
1: ld1 {v3.4s}, [x4] /* load first round key */
36+
prfm pldl1strm, [x1]
37+
cmp w5, #12 /* which key size? */
38+
add x6, x4, #16
39+
sub w7, w5, #2 /* modified # of rounds */
4940
bmi 2f
5041
bne 5f
5142
mov v5.16b, v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
6455
ld1 {v5.4s}, [x6], #16 /* load next round key */
6556
bpl 3b
6657
aese v0.16b, v4.16b
67-
subs w21, w21, #16 /* last data? */
58+
subs w2, w2, #16 /* last data? */
6859
eor v0.16b, v0.16b, v5.16b /* final round */
6960
bmi 6f
70-
ld1 {v1.16b}, [x20], #16 /* load next input block */
61+
ld1 {v1.16b}, [x1], #16 /* load next input block */
7162
eor v0.16b, v0.16b, v1.16b /* xor with mac */
72-
beq 6f
73-
74-
if_will_cond_yield_neon
75-
st1 {v0.16b}, [x19] /* store mac */
76-
do_cond_yield_neon
77-
ld1 {v0.16b}, [x19] /* reload mac */
78-
endif_yield_neon
79-
80-
b 1b
81-
6: st1 {v0.16b}, [x19] /* store mac */
63+
bne 1b
64+
6: st1 {v0.16b}, [x0] /* store mac */
8265
beq 10f
83-
adds w21, w21, #16
66+
adds w2, w2, #16
8467
beq 10f
85-
mov w25, w21
86-
7: ldrb w7, [x20], #1
68+
mov w8, w2
69+
7: ldrb w7, [x1], #1
8770
umov w6, v0.b[0]
8871
eor w6, w6, w7
89-
strb w6, [x19], #1
90-
subs w21, w21, #1
72+
strb w6, [x0], #1
73+
subs w2, w2, #1
9174
beq 10f
9275
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
9376
b 7b
94-
8: mov w7, w25
95-
add w25, w25, #16
77+
8: mov w7, w8
78+
add w8, w8, #16
9679
9: ext v1.16b, v1.16b, v1.16b, #1
9780
adds w7, w7, #1
9881
bne 9b
9982
eor v0.16b, v0.16b, v1.16b
100-
st1 {v0.16b}, [x19]
101-
10: str w25, [x22]
102-
103-
frame_pop
83+
st1 {v0.16b}, [x0]
84+
10: str w8, [x3]
10485
ret
10586
ENDPROC(ce_aes_ccm_auth_data)
10687

@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
145126
ENDPROC(ce_aes_ccm_final)
146127

147128
.macro aes_ccm_do_crypt,enc
148-
frame_push 8
149-
150-
mov x19, x0
151-
mov x20, x1
152-
mov x21, x2
153-
mov x22, x3
154-
mov x23, x4
155-
mov x24, x5
156-
mov x25, x6
157-
158-
ldr x26, [x25, #8] /* load lower ctr */
159-
ld1 {v0.16b}, [x24] /* load mac */
160-
CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
129+
ldr x8, [x6, #8] /* load lower ctr */
130+
ld1 {v0.16b}, [x5] /* load mac */
131+
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
161132
0: /* outer loop */
162-
ld1 {v1.8b}, [x25] /* load upper ctr */
163-
prfm pldl1strm, [x20]
164-
add x26, x26, #1
165-
rev x9, x26
166-
cmp w23, #12 /* which key size? */
167-
sub w7, w23, #2 /* get modified # of rounds */
133+
ld1 {v1.8b}, [x6] /* load upper ctr */
134+
prfm pldl1strm, [x1]
135+
add x8, x8, #1
136+
rev x9, x8
137+
cmp w4, #12 /* which key size? */
138+
sub w7, w4, #2 /* get modified # of rounds */
168139
ins v1.d[1], x9 /* no carry in lower ctr */
169-
ld1 {v3.4s}, [x22] /* load first round key */
170-
add x10, x22, #16
140+
ld1 {v3.4s}, [x3] /* load first round key */
141+
add x10, x3, #16
171142
bmi 1f
172143
bne 4f
173144
mov v5.16b, v3.16b
@@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
194165
bpl 2b
195166
aese v0.16b, v4.16b
196167
aese v1.16b, v4.16b
197-
subs w21, w21, #16
198-
bmi 7f /* partial block? */
199-
ld1 {v2.16b}, [x20], #16 /* load next input block */
168+
subs w2, w2, #16
169+
bmi 6f /* partial block? */
170+
ld1 {v2.16b}, [x1], #16 /* load next input block */
200171
.if \enc == 1
201172
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
202173
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
@@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
205176
eor v1.16b, v2.16b, v5.16b /* final round enc */
206177
.endif
207178
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
208-
st1 {v1.16b}, [x19], #16 /* write output block */
209-
beq 5f
210-
211-
if_will_cond_yield_neon
212-
st1 {v0.16b}, [x24] /* store mac */
213-
do_cond_yield_neon
214-
ld1 {v0.16b}, [x24] /* reload mac */
215-
endif_yield_neon
216-
217-
b 0b
218-
5:
219-
CPU_LE( rev x26, x26 )
220-
st1 {v0.16b}, [x24] /* store mac */
221-
str x26, [x25, #8] /* store lsb end of ctr (BE) */
222-
223-
6: frame_pop
224-
ret
225-
226-
7: eor v0.16b, v0.16b, v5.16b /* final round mac */
179+
st1 {v1.16b}, [x0], #16 /* write output block */
180+
bne 0b
181+
CPU_LE( rev x8, x8 )
182+
st1 {v0.16b}, [x5] /* store mac */
183+
str x8, [x6, #8] /* store lsb end of ctr (BE) */
184+
5: ret
185+
186+
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
227187
eor v1.16b, v1.16b, v5.16b /* final round enc */
228-
st1 {v0.16b}, [x24] /* store mac */
229-
add w21, w21, #16 /* process partial tail block */
230-
8: ldrb w9, [x20], #1 /* get 1 byte of input */
188+
st1 {v0.16b}, [x5] /* store mac */
189+
add w2, w2, #16 /* process partial tail block */
190+
7: ldrb w9, [x1], #1 /* get 1 byte of input */
231191
umov w6, v1.b[0] /* get top crypted ctr byte */
232192
umov w7, v0.b[0] /* get top mac byte */
233193
.if \enc == 1
@@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 )
237197
eor w9, w9, w6
238198
eor w7, w7, w9
239199
.endif
240-
strb w9, [x19], #1 /* store out byte */
241-
strb w7, [x24], #1 /* store mac byte */
242-
subs w21, w21, #1
243-
beq 6b
200+
strb w9, [x0], #1 /* store out byte */
201+
strb w7, [x5], #1 /* store mac byte */
202+
subs w2, w2, #1
203+
beq 5b
244204
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
245205
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
246-
b 8b
206+
b 7b
247207
.endm
248208

249209
/*

arch/arm64/crypto/ghash-ce-core.S

Lines changed: 25 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8)
322322
.endm
323323

324324
.macro pmull_gcm_do_crypt, enc
325-
frame_push 10
325+
ld1 {SHASH.2d}, [x4]
326+
ld1 {XL.2d}, [x1]
327+
ldr x8, [x5, #8] // load lower counter
326328

327-
mov x19, x0
328-
mov x20, x1
329-
mov x21, x2
330-
mov x22, x3
331-
mov x23, x4
332-
mov x24, x5
333-
mov x25, x6
334-
mov x26, x7
335-
.if \enc == 1
336-
ldr x27, [sp, #96] // first stacked arg
337-
.endif
338-
339-
ldr x28, [x24, #8] // load lower counter
340-
CPU_LE( rev x28, x28 )
341-
342-
0: mov x0, x25
343-
load_round_keys w26, x0
344-
ld1 {SHASH.2d}, [x23]
345-
ld1 {XL.2d}, [x20]
329+
load_round_keys w7, x6
346330

347331
movi MASK.16b, #0xe1
348332
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
333+
CPU_LE( rev x8, x8 )
349334
shl MASK.2d, MASK.2d, #57
350335
eor SHASH2.16b, SHASH2.16b, SHASH.16b
351336

352337
.if \enc == 1
353-
ld1 {KS.16b}, [x27]
338+
ldr x10, [sp]
339+
ld1 {KS.16b}, [x10]
354340
.endif
355341

356-
1: ld1 {CTR.8b}, [x24] // load upper counter
357-
ld1 {INP.16b}, [x22], #16
358-
rev x9, x28
359-
add x28, x28, #1
360-
sub w19, w19, #1
342+
0: ld1 {CTR.8b}, [x5] // load upper counter
343+
ld1 {INP.16b}, [x3], #16
344+
rev x9, x8
345+
add x8, x8, #1
346+
sub w0, w0, #1
361347
ins CTR.d[1], x9 // set lower counter
362348

363349
.if \enc == 1
364350
eor INP.16b, INP.16b, KS.16b // encrypt input
365-
st1 {INP.16b}, [x21], #16
351+
st1 {INP.16b}, [x2], #16
366352
.endif
367353

368354
rev64 T1.16b, INP.16b
369355

370-
cmp w26, #12
371-
b.ge 4f // AES-192/256?
356+
cmp w7, #12
357+
b.ge 2f // AES-192/256?
372358

373-
2: enc_round CTR, v21
359+
1: enc_round CTR, v21
374360

375361
ext T2.16b, XL.16b, XL.16b, #8
376362
ext IN1.16b, T1.16b, T1.16b, #8
@@ -425,39 +411,27 @@ CPU_LE( rev x28, x28 )
425411

426412
.if \enc == 0
427413
eor INP.16b, INP.16b, KS.16b
428-
st1 {INP.16b}, [x21], #16
414+
st1 {INP.16b}, [x2], #16
429415
.endif
430416

431-
cbz w19, 3f
417+
cbnz w0, 0b
432418

433-
if_will_cond_yield_neon
434-
st1 {XL.2d}, [x20]
435-
.if \enc == 1
436-
st1 {KS.16b}, [x27]
437-
.endif
438-
do_cond_yield_neon
439-
b 0b
440-
endif_yield_neon
419+
CPU_LE( rev x8, x8 )
420+
st1 {XL.2d}, [x1]
421+
str x8, [x5, #8] // store lower counter
441422

442-
b 1b
443-
444-
3: st1 {XL.2d}, [x20]
445423
.if \enc == 1
446-
st1 {KS.16b}, [x27]
424+
st1 {KS.16b}, [x10]
447425
.endif
448426

449-
CPU_LE( rev x28, x28 )
450-
str x28, [x24, #8] // store lower counter
451-
452-
frame_pop
453427
ret
454428

455-
4: b.eq 5f // AES-192?
429+
2: b.eq 3f // AES-192?
456430
enc_round CTR, v17
457431
enc_round CTR, v18
458-
5: enc_round CTR, v19
432+
3: enc_round CTR, v19
459433
enc_round CTR, v20
460-
b 2b
434+
b 1b
461435
.endm
462436

463437
/*

drivers/crypto/padlock-aes.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,14 +266,16 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
266266
return;
267267
}
268268

269+
count -= initial;
270+
269271
if (initial)
270272
asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
271273
: "+S"(input), "+D"(output)
272274
: "d"(control_word), "b"(key), "c"(initial));
273275

274276
asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
275277
: "+S"(input), "+D"(output)
276-
: "d"(control_word), "b"(key), "c"(count - initial));
278+
: "d"(control_word), "b"(key), "c"(count));
277279
}
278280

279281
static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
@@ -284,14 +286,16 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
284286
if (count < cbc_fetch_blocks)
285287
return cbc_crypt(input, output, key, iv, control_word, count);
286288

289+
count -= initial;
290+
287291
if (initial)
288292
asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
289293
: "+S" (input), "+D" (output), "+a" (iv)
290294
: "d" (control_word), "b" (key), "c" (initial));
291295

292296
asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
293297
: "+S" (input), "+D" (output), "+a" (iv)
294-
: "d" (control_word), "b" (key), "c" (count-initial));
298+
: "d" (control_word), "b" (key), "c" (count));
295299
return iv;
296300
}
297301

0 commit comments

Comments
 (0)