Skip to content

Commit e0be006

Browse files
author
yavtuk
committed
Move read only data to .rodata for armv8
1 parent 260ecea commit e0be006

File tree

7 files changed

+178
-70
lines changed

7 files changed

+178
-70
lines changed

crypto/aes/asm/vpaes-armv8.pl

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
5858
.text
5959
60+
.section .rodata
6061
.type _vpaes_consts,%object
6162
.align 7 // totally strategic alignment
6263
_vpaes_consts:
@@ -146,6 +147,7 @@
146147
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
147148
.size _vpaes_consts,.-_vpaes_consts
148149
.align 6
150+
.previous
149151
___
150152

151153
{
@@ -165,7 +167,8 @@
165167
.type _vpaes_encrypt_preheat,%function
166168
.align 4
167169
_vpaes_encrypt_preheat:
168-
adr x10, .Lk_inv
170+
adrp x10, _vpaes_consts
171+
add x10, x10, #:lo12:.Lk_inv
169172
movi v17.16b, #0x0f
170173
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
171174
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
@@ -193,7 +196,8 @@
193196
_vpaes_encrypt_core:
194197
mov x9, $key
195198
ldr w8, [$key,#240] // pull rounds
196-
adr x11, .Lk_mc_forward+16
199+
adrp x11, _vpaes_consts
200+
add x11, x11, #:lo12:.Lk_mc_forward+16
197201
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
198202
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
199203
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -280,7 +284,8 @@
280284
_vpaes_encrypt_2x:
281285
mov x9, $key
282286
ldr w8, [$key,#240] // pull rounds
283-
adr x11, .Lk_mc_forward+16
287+
adrp x11, _vpaes_consts
288+
add x11, x11, #:lo12:.Lk_mc_forward+16
284289
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
285290
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
286291
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -383,9 +388,11 @@
383388
.type _vpaes_decrypt_preheat,%function
384389
.align 4
385390
_vpaes_decrypt_preheat:
386-
adr x10, .Lk_inv
391+
adrp x10, _vpaes_consts
392+
add x10, x10, #:lo12:.Lk_inv
387393
movi v17.16b, #0x0f
388-
adr x11, .Lk_dipt
394+
adrp x11, _vpaes_consts
395+
add x11, x11, #:lo12:.Lk_dipt
389396
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
390397
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
391398
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
@@ -407,10 +414,12 @@
407414
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
408415
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
409416
eor x11, x11, #0x30 // xor \$0x30, %r11
410-
adr x10, .Lk_sr
417+
adrp x10, _vpaes_consts
418+
add x10, x10, #:lo12:.Lk_sr
411419
and x11, x11, #0x30 // and \$0x30, %r11
412420
add x11, x11, x10
413-
adr x10, .Lk_mc_forward+48
421+
adrp x10, _vpaes_consts
422+
add x10, x10, #:lo12:.Lk_mc_forward+48
414423
415424
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
416425
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -518,10 +527,12 @@
518527
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
519528
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
520529
eor x11, x11, #0x30 // xor \$0x30, %r11
521-
adr x10, .Lk_sr
530+
adrp x10, _vpaes_consts
531+
add x10, x10, #:lo12:.Lk_sr
522532
and x11, x11, #0x30 // and \$0x30, %r11
523533
add x11, x11, x10
524-
adr x10, .Lk_mc_forward+48
534+
adrp x10, _vpaes_consts
535+
add x10, x10, #:lo12:.Lk_mc_forward+48
525536
526537
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
527538
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -657,14 +668,18 @@
657668
.type _vpaes_key_preheat,%function
658669
.align 4
659670
_vpaes_key_preheat:
660-
adr x10, .Lk_inv
671+
adrp x10, _vpaes_consts
672+
add x10, x10, #:lo12:.Lk_inv
661673
movi v16.16b, #0x5b // .Lk_s63
662-
adr x11, .Lk_sb1
674+
adrp x11, _vpaes_consts
675+
add x11, x11, #:lo12:.Lk_sb1
663676
movi v17.16b, #0x0f // .Lk_s0F
664677
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
665-
adr x10, .Lk_dksd
678+
adrp x10, _vpaes_consts
679+
add x10, x10, #:lo12:.Lk_dksd
666680
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
667-
adr x11, .Lk_mc_forward
681+
adrp x11, _vpaes_consts
682+
add x11, x11, #:lo12:.Lk_mc_forward
668683
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
669684
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
670685
ld1 {v8.2d}, [x10] // .Lk_rcon
@@ -688,7 +703,8 @@
688703
bl _vpaes_schedule_transform
689704
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
690705
691-
adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
706+
adrp x10, _vpaes_consts
707+
add x10, x10, #:lo12:.Lk_sr // lea .Lk_sr(%rip),%r10
692708
add x8, x8, x10
693709
cbnz $dir, .Lschedule_am_decrypting
694710
@@ -814,12 +830,15 @@
814830
.align 4
815831
.Lschedule_mangle_last:
816832
// schedule last round key from xmm0
817-
adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
833+
adrp x11, _vpaes_consts
834+
add x11, x11, #:lo12:.Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
835+
818836
cbnz $dir, .Lschedule_mangle_last_dec
819837
820838
// encrypting
821839
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
822-
adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
840+
adrp x11, _vpaes_consts
841+
add x11, x11, #:lo12:.Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
823842
add $out, $out, #32 // add \$32, %rdx
824843
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
825844

crypto/ec/asm/ecp_nistz256-armv8.pl

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
die "insane number of elements" if ($#arr != 64*16*37-1);
7979

8080
$code.=<<___;
81+
.section .rodata
8182
.globl ecp_nistz256_precomputed
8283
.type ecp_nistz256_precomputed,%object
8384
.align 12
@@ -103,19 +104,38 @@
103104
$code.=<<___;
104105
.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed
105106
.align 5
107+
.type .Lpoly,%object
106108
.Lpoly:
107109
.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
110+
.size .Lpoly,.-.Lpoly
111+
112+
.type .LRR,%object
108113
.LRR: // 2^512 mod P precomputed for NIST P256 polynomial
109114
.quad 0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd
115+
.size .LRR,.-.LRR
116+
117+
.type .Lone_mont,%object
110118
.Lone_mont:
111119
.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
120+
.size .Lone_mont,.-.Lone_mont
121+
122+
.type .Lone,%object
112123
.Lone:
113124
.quad 1,0,0,0
125+
.size .Lone,.-.Lone
126+
127+
.type .Lord,%object
114128
.Lord:
115129
.quad 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
130+
.size .Lord,.-.Lord
131+
132+
.type .LordK,%object
116133
.LordK:
117134
.quad 0xccd1c8aaee00bc4f
135+
.size .LordK,.-.LordK
136+
118137
.asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
138+
.previous
119139
120140
// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
121141
.globl ecp_nistz256_to_mont
@@ -127,12 +147,16 @@
127147
add x29,sp,#0
128148
stp x19,x20,[sp,#16]
129149
130-
ldr $bi,.LRR // bp[0]
150+
adrp $bi, .LRR
151+
ldr $bi, [$bi, #:lo12:.LRR] // bp[0]
131152
ldp $a0,$a1,[$ap]
132153
ldp $a2,$a3,[$ap,#16]
133-
ldr $poly1,.Lpoly+8
134-
ldr $poly3,.Lpoly+24
135-
adr $bp,.LRR // &bp[0]
154+
adrp $poly1, .Lpoly
155+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
156+
adrp $poly3, .Lpoly
157+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
158+
adrp $bp, .LRR
159+
add $bp, $bp, #:lo12:.LRR // &bp[0]
136160
137161
bl __ecp_nistz256_mul_mont
138162
@@ -155,9 +179,12 @@
155179
mov $bi,#1 // bp[0]
156180
ldp $a0,$a1,[$ap]
157181
ldp $a2,$a3,[$ap,#16]
158-
ldr $poly1,.Lpoly+8
159-
ldr $poly3,.Lpoly+24
160-
adr $bp,.Lone // &bp[0]
182+
adrp $poly1, .Lpoly
183+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
184+
adrp $poly3, .Lpoly
185+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
186+
adrp $bp, .Lone
187+
add $bp, $bp, #:lo12:.Lone // &bp[0]
161188
162189
bl __ecp_nistz256_mul_mont
163190
@@ -181,8 +208,10 @@
181208
ldr $bi,[$bp] // bp[0]
182209
ldp $a0,$a1,[$ap]
183210
ldp $a2,$a3,[$ap,#16]
184-
ldr $poly1,.Lpoly+8
185-
ldr $poly3,.Lpoly+24
211+
adrp $poly1, .Lpoly
212+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
213+
adrp $poly3, .Lpoly
214+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
186215
187216
bl __ecp_nistz256_mul_mont
188217
@@ -204,8 +233,10 @@
204233
205234
ldp $a0,$a1,[$ap]
206235
ldp $a2,$a3,[$ap,#16]
207-
ldr $poly1,.Lpoly+8
208-
ldr $poly3,.Lpoly+24
236+
adrp $poly1, .Lpoly
237+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
238+
adrp $poly3, .Lpoly
239+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
209240
210241
bl __ecp_nistz256_sqr_mont
211242
@@ -229,8 +260,10 @@
229260
ldp $t0,$t1,[$bp]
230261
ldp $acc2,$acc3,[$ap,#16]
231262
ldp $t2,$t3,[$bp,#16]
232-
ldr $poly1,.Lpoly+8
233-
ldr $poly3,.Lpoly+24
263+
adrp $poly1, .Lpoly
264+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
265+
adrp $poly3, .Lpoly
266+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
234267
235268
bl __ecp_nistz256_add
236269
@@ -250,8 +283,10 @@
250283
251284
ldp $acc0,$acc1,[$ap]
252285
ldp $acc2,$acc3,[$ap,#16]
253-
ldr $poly1,.Lpoly+8
254-
ldr $poly3,.Lpoly+24
286+
adrp $poly1, .Lpoly
287+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
288+
adrp $poly3, .Lpoly
289+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
255290
256291
bl __ecp_nistz256_div_by_2
257292
@@ -271,8 +306,10 @@
271306
272307
ldp $acc0,$acc1,[$ap]
273308
ldp $acc2,$acc3,[$ap,#16]
274-
ldr $poly1,.Lpoly+8
275-
ldr $poly3,.Lpoly+24
309+
adrp $poly1, .Lpoly
310+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
311+
adrp $poly3, .Lpoly
312+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
276313
mov $t0,$acc0
277314
mov $t1,$acc1
278315
mov $t2,$acc2
@@ -296,8 +333,10 @@
296333
297334
ldp $acc0,$acc1,[$ap]
298335
ldp $acc2,$acc3,[$ap,#16]
299-
ldr $poly1,.Lpoly+8
300-
ldr $poly3,.Lpoly+24
336+
adrp $poly1, .Lpoly
337+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
338+
adrp $poly3, .Lpoly
339+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
301340
mov $t0,$acc0
302341
mov $t1,$acc1
303342
mov $t2,$acc2
@@ -333,8 +372,10 @@
333372
334373
ldp $acc0,$acc1,[$ap]
335374
ldp $acc2,$acc3,[$ap,#16]
336-
ldr $poly1,.Lpoly+8
337-
ldr $poly3,.Lpoly+24
375+
adrp $poly1, .Lpoly
376+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
377+
adrp $poly3, .Lpoly
378+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
338379
339380
bl __ecp_nistz256_sub_from
340381
@@ -357,8 +398,10 @@
357398
mov $acc1,xzr
358399
mov $acc2,xzr
359400
mov $acc3,xzr
360-
ldr $poly1,.Lpoly+8
361-
ldr $poly3,.Lpoly+24
401+
adrp $poly1, .Lpoly
402+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
403+
adrp $poly3, .Lpoly
404+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
362405
363406
bl __ecp_nistz256_sub_from
364407
@@ -736,9 +779,11 @@
736779
mov $rp_real,$rp
737780
ldp $acc2,$acc3,[$ap,#48]
738781
mov $ap_real,$ap
739-
ldr $poly1,.Lpoly+8
782+
adrp $poly1, .Lpoly
783+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
740784
mov $t0,$acc0
741-
ldr $poly3,.Lpoly+24
785+
adrp $poly3, .Lpoly
786+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
742787
mov $t1,$acc1
743788
ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont
744789
mov $t2,$acc2
@@ -897,8 +942,10 @@
897942
mov $rp_real,$rp
898943
mov $ap_real,$ap
899944
mov $bp_real,$bp
900-
ldr $poly1,.Lpoly+8
901-
ldr $poly3,.Lpoly+24
945+
adrp $poly1, .Lpoly
946+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
947+
adrp $poly3, .Lpoly
948+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
902949
orr $t0,$a0,$a1
903950
orr $t2,$a2,$a3
904951
orr $in2infty,$t0,$t2
@@ -1151,8 +1198,10 @@
11511198
mov $rp_real,$rp
11521199
mov $ap_real,$ap
11531200
mov $bp_real,$bp
1154-
ldr $poly1,.Lpoly+8
1155-
ldr $poly3,.Lpoly+24
1201+
adrp $poly1, .Lpoly
1202+
ldr $poly1, [$poly1, #:lo12:.Lpoly+8]
1203+
adrp $poly3, .Lpoly
1204+
ldr $poly3, [$poly3, #:lo12:.Lpoly+24]
11561205
11571206
ldp $a0,$a1,[$ap,#64] // in1_z
11581207
ldp $a2,$a3,[$ap,#64+16]
@@ -1303,7 +1352,8 @@
13031352
stp $acc2,$acc3,[$rp_real,#$i+16]
13041353
___
13051354
$code.=<<___ if ($i == 0);
1306-
adr $bp_real,.Lone_mont-64
1355+
adrp $bp_real, .Lone_mont
1356+
add $bp_real, $bp_real, #:lo12:.Lone_mont-64
13071357
___
13081358
}
13091359
$code.=<<___;
@@ -1354,7 +1404,8 @@
13541404
stp x21,x22,[sp,#32]
13551405
stp x23,x24,[sp,#48]
13561406
1357-
adr $ordk,.Lord
1407+
adrp $ordk,.Lord
1408+
add $ordk, $ordk, #:lo12:.Lord
13581409
ldr $bi,[$bp] // bp[0]
13591410
ldp $a0,$a1,[$ap]
13601411
ldp $a2,$a3,[$ap,#16]
@@ -1497,7 +1548,8 @@
14971548
stp x21,x22,[sp,#32]
14981549
stp x23,x24,[sp,#48]
14991550
1500-
adr $ordk,.Lord
1551+
adrp $ordk,.Lord
1552+
add $ordk, $ordk, #:lo12:.Lord
15011553
ldp $a0,$a1,[$ap]
15021554
ldp $a2,$a3,[$ap,#16]
15031555

0 commit comments

Comments
 (0)