1919 * u32 * macp , u8 const rk [], u32 rounds) ;
2020 * /
2121ENTRY(ce_aes_ccm_auth_data)
22- frame_push 7
23-
24- mov x19 , x0
25- mov x20 , x1
26- mov x21 , x2
27- mov x22 , x3
28- mov x23 , x4
29- mov x24 , x5
30-
31- ldr w25 , [ x22 ] / * leftover from prev round? * /
22+ ldr w8 , [ x3 ] / * leftover from prev round? * /
3223 ld1 {v0.16b} , [ x0 ] / * load mac * /
33- cbz w25 , 1f
34- sub w25 , w25 , # 16
24+ cbz w8 , 1f
25+ sub w8 , w8 , # 16
3526 eor v1.16b , v1.16b , v1.16b
36- 0 : ldrb w7 , [ x20 ], # 1 / * get 1 byte of input * /
37- subs w21 , w21 , # 1
38- add w25 , w25 , # 1
27+ 0 : ldrb w7 , [ x1 ], # 1 / * get 1 byte of input * /
28+ subs w2 , w2 , # 1
29+ add w8 , w8 , # 1
3930 ins v1.b [ 0 ], w7
4031 ext v1.16b , v1.16b , v1.16b , # 1 / * rotate in the input bytes * /
4132 beq 8f / * out of input? * /
42- cbnz w25 , 0b
33+ cbnz w8 , 0b
4334 eor v0.16b , v0.16b , v1.16b
44- 1 : ld1 {v3.4s} , [ x23 ] / * load first round key * /
45- prfm pldl1strm , [ x20 ]
46- cmp w24 , # 12 / * which key size? * /
47- add x6 , x23 , # 16
48- sub w7 , w24 , # 2 / * modified # of rounds * /
35+ 1 : ld1 {v3.4s} , [ x4 ] / * load first round key * /
36+ prfm pldl1strm , [ x1 ]
37+ cmp w5 , # 12 /* which key size? * /
38+ add x6 , x4 , # 16
39+ sub w7 , w5 , # 2 / * modified # of rounds * /
4940 bmi 2f
5041 bne 5f
5142 mov v5.16b , v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
6455 ld1 {v5.4s} , [ x6 ], # 16 / * load next round key * /
6556 bpl 3b
6657 aese v0.16b , v4.16b
67- subs w21 , w21 , # 16 / * last data? * /
58+ subs w2 , w2 , # 16 / * last data? * /
6859 eor v0.16b , v0.16b , v5.16b / * final round * /
6960 bmi 6f
70- ld1 {v1.16b} , [ x20 ], # 16 / * load next input block * /
61+ ld1 {v1.16b} , [ x1 ], # 16 / * load next input block * /
7162 eor v0.16b , v0.16b , v1.16b / * xor with mac * /
72- beq 6f
73-
74- if_will_cond_yield_neon
75- st1 {v0.16b} , [ x19 ] / * store mac * /
76- do_cond_yield_neon
77- ld1 {v0.16b} , [ x19 ] / * reload mac * /
78- endif_yield_neon
79-
80- b 1b
81- 6 : st1 {v0.16b} , [ x19 ] / * store mac * /
63+ bne 1b
64+ 6 : st1 {v0.16b} , [ x0 ] / * store mac * /
8265 beq 10f
83- adds w21 , w21 , # 16
66+ adds w2 , w2 , # 16
8467 beq 10f
85- mov w25 , w21
86- 7 : ldrb w7 , [ x20 ], # 1
68+ mov w8 , w2
69+ 7 : ldrb w7 , [ x1 ], # 1
8770 umov w6 , v0.b [ 0 ]
8871 eor w6 , w6 , w7
89- strb w6 , [ x19 ], # 1
90- subs w21 , w21 , # 1
72+ strb w6 , [ x0 ], # 1
73+ subs w2 , w2 , # 1
9174 beq 10f
9275 ext v0.16b , v0.16b , v0.16b , # 1 / * rotate out the mac bytes * /
9376 b 7b
94- 8 : mov w7 , w25
95- add w25 , w25 , # 16
77+ 8 : mov w7 , w8
78+ add w8 , w8 , # 16
96799 : ext v1.16b , v1.16b , v1.16b , # 1
9780 adds w7 , w7 , # 1
9881 bne 9b
9982 eor v0.16b , v0.16b , v1.16b
100- st1 {v0.16b} , [ x19 ]
101- 10 : str w25 , [ x22 ]
102-
103- frame_pop
83+ st1 {v0.16b} , [ x0 ]
84+ 10 : str w8 , [ x3 ]
10485 ret
10586ENDPROC(ce_aes_ccm_auth_data)
10687
@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
145126ENDPROC(ce_aes_ccm_final)
146127
147128 .macro aes_ccm_do_crypt , enc
148- frame_push 8
149-
150- mov x19 , x0
151- mov x20 , x1
152- mov x21 , x2
153- mov x22 , x3
154- mov x23 , x4
155- mov x24 , x5
156- mov x25 , x6
157-
158- ldr x26 , [ x25 , # 8 ] / * load lower ctr * /
159- ld1 {v0.16b} , [ x24 ] / * load mac * /
160- CPU_LE( rev x26 , x26 ) / * keep swabbed ctr in reg * /
129+ ldr x8 , [ x6 , # 8 ] / * load lower ctr * /
130+ ld1 {v0.16b} , [ x5 ] / * load mac * /
131+ CPU_LE( rev x8 , x8 ) / * keep swabbed ctr in reg * /
1611320 : / * outer loop * /
162- ld1 {v1.8b} , [ x25 ] / * load upper ctr * /
163- prfm pldl1strm , [ x20 ]
164- add x26 , x26 , # 1
165- rev x9 , x26
166- cmp w23 , # 12 / * which key size? * /
167- sub w7 , w23 , # 2 / * get modified # of rounds * /
133+ ld1 {v1.8b} , [ x6 ] / * load upper ctr * /
134+ prfm pldl1strm , [ x1 ]
135+ add x8 , x8 , # 1
136+ rev x9 , x8
137+ cmp w4 , # 12 /* which key size? * /
138+ sub w7 , w4 , # 2 / * get modified # of rounds * /
168139 ins v1.d [ 1 ], x9 / * no carry in lower ctr * /
169- ld1 {v3.4s} , [ x22 ] / * load first round key * /
170- add x10 , x22 , # 16
140+ ld1 {v3.4s} , [ x3 ] / * load first round key * /
141+ add x10 , x3 , # 16
171142 bmi 1f
172143 bne 4f
173144 mov v5.16b , v3.16b
@@ -194,9 +165,9 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
194165 bpl 2b
195166 aese v0.16b , v4.16b
196167 aese v1.16b , v4.16b
197- subs w21 , w21 , # 16
198- bmi 7f / * partial block? * /
199- ld1 {v2.16b} , [ x20 ], # 16 / * load next input block * /
168+ subs w2 , w2 , # 16
169+ bmi 6f / * partial block? * /
170+ ld1 {v2.16b} , [ x1 ], # 16 / * load next input block * /
200171 .if \enc == 1
201172 eor v2.16b , v2.16b , v5.16b / * final round enc + mac * /
202173 eor v1.16b , v1.16b , v2.16b / * xor with crypted ctr * /
@@ -205,29 +176,18 @@ CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
205176 eor v1.16b , v2.16b , v5.16b / * final round enc * /
206177 .endif
207178 eor v0.16b , v0.16b , v2.16b / * xor mac with pt ^ rk [ last ] * /
208- st1 {v1.16b} , [ x19 ], # 16 / * write output block * /
209- beq 5f
210-
211- if_will_cond_yield_neon
212- st1 {v0.16b} , [ x24 ] / * store mac * /
213- do_cond_yield_neon
214- ld1 {v0.16b} , [ x24 ] / * reload mac * /
215- endif_yield_neon
216-
217- b 0b
218- 5 :
219- CPU_LE( rev x26 , x26 )
220- st1 {v0.16b} , [ x24 ] / * store mac * /
221- str x26 , [ x25 , # 8 ] / * store lsb end of ctr (BE) * /
222-
223- 6 : frame_pop
224- ret
225-
226- 7 : eor v0.16b , v0.16b , v5.16b / * final round mac * /
179+ st1 {v1.16b} , [ x0 ], # 16 / * write output block * /
180+ bne 0b
181+ CPU_LE( rev x8 , x8 )
182+ st1 {v0.16b} , [ x5 ] / * store mac * /
183+ str x8 , [ x6 , # 8 ] / * store lsb end of ctr (BE) * /
184+ 5 : ret
185+
186+ 6 : eor v0.16b , v0.16b , v5.16b / * final round mac * /
227187 eor v1.16b , v1.16b , v5.16b / * final round enc * /
228- st1 {v0.16b} , [ x24 ] / * store mac * /
229- add w21 , w21 , # 16 / * process partial tail block * /
230- 8 : ldrb w9 , [ x20 ], # 1 / * get 1 byte of input * /
188+ st1 {v0.16b} , [ x5 ] / * store mac * /
189+ add w2 , w2 , # 16 / * process partial tail block * /
190+ 7 : ldrb w9 , [ x1 ], # 1 / * get 1 byte of input * /
231191 umov w6 , v1.b [ 0 ] / * get top crypted ctr byte * /
232192 umov w7 , v0.b [ 0 ] / * get top mac byte * /
233193 .if \enc == 1
@@ -237,13 +197,13 @@ CPU_LE( rev x26, x26 )
237197 eor w9 , w9 , w6
238198 eor w7 , w7 , w9
239199 .endif
240- strb w9 , [ x19 ], # 1 / * store out byte * /
241- strb w7 , [ x24 ], # 1 / * store mac byte * /
242- subs w21 , w21 , # 1
243- beq 6b
200+ strb w9 , [ x0 ], # 1 / * store out byte * /
201+ strb w7 , [ x5 ], # 1 / * store mac byte * /
202+ subs w2 , w2 , # 1
203+ beq 5b
244204 ext v0.16b , v0.16b , v0.16b , # 1 / * shift out mac byte * /
245205 ext v1.16b , v1.16b , v1.16b , # 1 / * shift out ctr byte * /
246- b 8b
206+ b 7b
247207 .endm
248208
249209 / *
0 commit comments