Skip to content

Commit 85bfc49

Browse files
authored
Merge pull request #9355 from SparkiDev/aes_arm_asm_fix
AES ARM ASM: user data loaded 1 reg at a time
2 parents e6af5bc + d883a95 commit 85bfc49

File tree

6 files changed

+110
-152
lines changed

6 files changed

+110
-152
lines changed

wolfcrypt/src/port/arm/armv8-32-aes-asm.S

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8596,17 +8596,10 @@ AES_set_encrypt_key:
85968596
beq L_AES_set_encrypt_key_start_128
85978597
cmp r1, #0xc0
85988598
beq L_AES_set_encrypt_key_start_192
8599-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8600-
ldm r0, {r4, r5}
8601-
#else
8602-
ldrd r4, r5, [r0]
8603-
#endif
8604-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8599+
ldr r4, [r0]
8600+
ldr r5, [r0, #4]
86058601
ldr r6, [r0, #8]
86068602
ldr r7, [r0, #12]
8607-
#else
8608-
ldrd r6, r7, [r0, #8]
8609-
#endif
86108603
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
86118604
# REV r4, r4
86128605
eor r3, r4, r4, ror #16
@@ -8635,18 +8628,10 @@ AES_set_encrypt_key:
86358628
rev r7, r7
86368629
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
86378630
stm r2!, {r4, r5, r6, r7}
8638-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
86398631
ldr r4, [r0, #16]
86408632
ldr r5, [r0, #20]
8641-
#else
8642-
ldrd r4, r5, [r0, #16]
8643-
#endif
8644-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
86458633
ldr r6, [r0, #24]
86468634
ldr r7, [r0, #28]
8647-
#else
8648-
ldrd r6, r7, [r0, #24]
8649-
#endif
86508635
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
86518636
# REV r4, r4
86528637
eor r3, r4, r4, ror #16
@@ -8825,23 +8810,12 @@ L_AES_set_encrypt_key_loop_256:
88258810
sub r2, r2, #16
88268811
b L_AES_set_encrypt_key_end
88278812
L_AES_set_encrypt_key_start_192:
8828-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8829-
ldm r0, {r4, r5}
8830-
#else
8831-
ldrd r4, r5, [r0]
8832-
#endif
8833-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8813+
ldr r4, [r0]
8814+
ldr r5, [r0, #4]
88348815
ldr r6, [r0, #8]
88358816
ldr r7, [r0, #12]
8836-
#else
8837-
ldrd r6, r7, [r0, #8]
8838-
#endif
8839-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
88408817
ldr r1, [r0, #20]
88418818
ldr r0, [r0, #16]
8842-
#else
8843-
ldrd r0, r1, [r0, #16]
8844-
#endif
88458819
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
88468820
# REV r4, r4
88478821
eor r3, r4, r4, ror #16
@@ -8989,17 +8963,10 @@ L_AES_set_encrypt_key_loop_192:
89898963
stm r2, {r0, r1, r4, r5}
89908964
b L_AES_set_encrypt_key_end
89918965
L_AES_set_encrypt_key_start_128:
8992-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8993-
ldm r0, {r4, r5}
8994-
#else
8995-
ldrd r4, r5, [r0]
8996-
#endif
8997-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8966+
ldr r4, [r0]
8967+
ldr r5, [r0, #4]
89988968
ldr r6, [r0, #8]
89998969
ldr r7, [r0, #12]
9000-
#else
9001-
ldrd r6, r7, [r0, #8]
9002-
#endif
90038970
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
90048971
# REV r4, r4
90058972
eor r3, r4, r4, ror #16

wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8876,17 +8876,10 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
88768876
"beq L_AES_set_encrypt_key_start_128_%=\n\t"
88778877
"cmp %[len], #0xc0\n\t"
88788878
"beq L_AES_set_encrypt_key_start_192_%=\n\t"
8879-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8880-
"ldm r0, {r4, r5}\n\t"
8881-
#else
8882-
"ldrd r4, r5, [%[key]]\n\t"
8883-
#endif
8884-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
8879+
"ldr r4, [%[key]]\n\t"
8880+
"ldr r5, [%[key], #4]\n\t"
88858881
"ldr r6, [%[key], #8]\n\t"
88868882
"ldr r7, [%[key], #12]\n\t"
8887-
#else
8888-
"ldrd r6, r7, [%[key], #8]\n\t"
8889-
#endif
88908883
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
88918884
/* REV r4, r4 */
88928885
"eor r3, r4, r4, ror #16\n\t"
@@ -8915,18 +8908,10 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
89158908
"rev r7, r7\n\t"
89168909
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
89178910
"stm %[ks]!, {r4, r5, r6, r7}\n\t"
8918-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
89198911
"ldr r4, [%[key], #16]\n\t"
89208912
"ldr r5, [%[key], #20]\n\t"
8921-
#else
8922-
"ldrd r4, r5, [%[key], #16]\n\t"
8923-
#endif
8924-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
89258913
"ldr r6, [%[key], #24]\n\t"
89268914
"ldr r7, [%[key], #28]\n\t"
8927-
#else
8928-
"ldrd r6, r7, [%[key], #24]\n\t"
8929-
#endif
89308915
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
89318916
/* REV r4, r4 */
89328917
"eor r3, r4, r4, ror #16\n\t"
@@ -9107,23 +9092,12 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
91079092
"b L_AES_set_encrypt_key_end_%=\n\t"
91089093
"\n"
91099094
"L_AES_set_encrypt_key_start_192_%=: \n\t"
9110-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
9111-
"ldm r0, {r4, r5}\n\t"
9112-
#else
9113-
"ldrd r4, r5, [%[key]]\n\t"
9114-
#endif
9115-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
9095+
"ldr r4, [%[key]]\n\t"
9096+
"ldr r5, [%[key], #4]\n\t"
91169097
"ldr r6, [%[key], #8]\n\t"
91179098
"ldr r7, [%[key], #12]\n\t"
9118-
#else
9119-
"ldrd r6, r7, [%[key], #8]\n\t"
9120-
#endif
9121-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
91229099
"ldr %[len], [%[key], #20]\n\t"
91239100
"ldr %[key], [%[key], #16]\n\t"
9124-
#else
9125-
"ldrd %[key], %[len], [%[key], #16]\n\t"
9126-
#endif
91279101
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
91289102
/* REV r4, r4 */
91299103
"eor r3, r4, r4, ror #16\n\t"
@@ -9273,17 +9247,10 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key,
92739247
"b L_AES_set_encrypt_key_end_%=\n\t"
92749248
"\n"
92759249
"L_AES_set_encrypt_key_start_128_%=: \n\t"
9276-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
9277-
"ldm r0, {r4, r5}\n\t"
9278-
#else
9279-
"ldrd r4, r5, [%[key]]\n\t"
9280-
#endif
9281-
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
9250+
"ldr r4, [%[key]]\n\t"
9251+
"ldr r5, [%[key], #4]\n\t"
92829252
"ldr r6, [%[key], #8]\n\t"
92839253
"ldr r7, [%[key], #12]\n\t"
9284-
#else
9285-
"ldrd r6, r7, [%[key], #8]\n\t"
9286-
#endif
92879254
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
92889255
/* REV r4, r4 */
92899256
"eor r3, r4, r4, ror #16\n\t"

wolfcrypt/src/port/arm/armv8-aes-asm.S

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ _AES_set_key_AARCH64:
4646
cmp x1, #24
4747
blt L_aes_set_key_arm64_crypto_start_128
4848
bgt L_aes_set_key_arm64_crypto_start_256
49-
ldp x4, x6, [x0], #16
50-
ldr x8, [x0]
49+
ldr x4, [x0], #8
50+
ldr x6, [x0], #8
51+
ldr x8, [x0], #8
5152
stp x4, x6, [x2], #16
5253
str x8, [x2], #8
5354
lsr x5, x4, #32
@@ -212,8 +213,10 @@ _AES_set_key_AARCH64:
212213
stur q0, [x2, #96]
213214
b L_aes_set_key_arm64_crypto_done
214215
L_aes_set_key_arm64_crypto_start_256:
215-
ldp x4, x6, [x0], #16
216-
ldp x8, x10, [x0], #16
216+
ldr x4, [x0], #8
217+
ldr x6, [x0], #8
218+
ldr x8, [x0], #8
219+
ldr x10, [x0], #8
217220
stp x4, x6, [x2], #16
218221
stp x8, x10, [x2], #16
219222
lsr x5, x4, #32
@@ -412,7 +415,8 @@ L_aes_set_key_arm64_crypto_start_256:
412415
stur q0, [x2, #112]
413416
b L_aes_set_key_arm64_crypto_done
414417
L_aes_set_key_arm64_crypto_start_128:
415-
ldp x4, x6, [x0], #16
418+
ldr x4, [x0], #8
419+
ldr x6, [x0], #8
416420
stp x4, x6, [x2], #16
417421
lsr x5, x4, #32
418422
lsr x7, x6, #32

wolfcrypt/src/port/arm/armv8-aes-asm_c.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,9 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir)
4040
"cmp %x[keylen], #24\n\t"
4141
"b.lt L_aes_set_key_arm64_crypto_start_128_%=\n\t"
4242
"b.gt L_aes_set_key_arm64_crypto_start_256_%=\n\t"
43-
"ldp x4, x6, [%x[userKey]], #16\n\t"
44-
"ldr x8, [%x[userKey]]\n\t"
43+
"ldr x4, [%x[userKey]], #8\n\t"
44+
"ldr x6, [%x[userKey]], #8\n\t"
45+
"ldr x8, [%x[userKey]], #8\n\t"
4546
"stp x4, x6, [%x[key]], #16\n\t"
4647
"str x8, [%x[key]], #8\n\t"
4748
"lsr x5, x4, #32\n\t"
@@ -207,8 +208,10 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir)
207208
"b L_aes_set_key_arm64_crypto_done_%=\n\t"
208209
"\n"
209210
"L_aes_set_key_arm64_crypto_start_256_%=: \n\t"
210-
"ldp x4, x6, [%x[userKey]], #16\n\t"
211-
"ldp x8, x10, [%x[userKey]], #16\n\t"
211+
"ldr x4, [%x[userKey]], #8\n\t"
212+
"ldr x6, [%x[userKey]], #8\n\t"
213+
"ldr x8, [%x[userKey]], #8\n\t"
214+
"ldr x10, [%x[userKey]], #8\n\t"
212215
"stp x4, x6, [%x[key]], #16\n\t"
213216
"stp x8, x10, [%x[key]], #16\n\t"
214217
"lsr x5, x4, #32\n\t"
@@ -408,7 +411,8 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir)
408411
"b L_aes_set_key_arm64_crypto_done_%=\n\t"
409412
"\n"
410413
"L_aes_set_key_arm64_crypto_start_128_%=: \n\t"
411-
"ldp x4, x6, [%x[userKey]], #16\n\t"
414+
"ldr x4, [%x[userKey]], #8\n\t"
415+
"ldr x6, [%x[userKey]], #8\n\t"
412416
"stp x4, x6, [%x[key]], #16\n\t"
413417
"lsr x5, x4, #32\n\t"
414418
"lsr x7, x6, #32\n\t"

wolfcrypt/src/port/arm/armv8-mlkem-asm.S

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10074,6 +10074,13 @@ _mlkem_shake128_blocksx3_seed_neon:
1007410074
stp d10, d11, [x29, #176]
1007510075
stp d12, d13, [x29, #192]
1007610076
stp d14, d15, [x29, #208]
10077+
#ifndef __APPLE__
10078+
adrp x28, L_sha3_aarch64_r
10079+
add x28, x28, :lo12:L_sha3_aarch64_r
10080+
#else
10081+
adrp x28, L_sha3_aarch64_r@PAGE
10082+
add x28, x28, :lo12:L_sha3_aarch64_r@PAGEOFF
10083+
#endif /* __APPLE__ */
1007710084
str x0, [x29, #40]
1007810085
add x0, x0, #32
1007910086
ld1 {v4.d}[0], [x0]
@@ -10414,6 +10421,13 @@ _mlkem_shake256_blocksx3_seed_neon:
1041410421
stp d10, d11, [x29, #176]
1041510422
stp d12, d13, [x29, #192]
1041610423
stp d14, d15, [x29, #208]
10424+
#ifndef __APPLE__
10425+
adrp x28, L_sha3_aarch64_r
10426+
add x28, x28, :lo12:L_sha3_aarch64_r
10427+
#else
10428+
adrp x28, L_sha3_aarch64_r@PAGE
10429+
add x28, x28, :lo12:L_sha3_aarch64_r@PAGEOFF
10430+
#endif /* __APPLE__ */
1041710431
str x0, [x29, #40]
1041810432
add x0, x0, #32
1041910433
ld1 {v4.d}[0], [x0]

0 commit comments

Comments
 (0)