Skip to content

Commit 19b9a90

Browse files
lundmanmcmilk
authored andcommitted
Restore ASMABI and other Unify work
Make sure all SHA2 transform function has wrappers For ASMABI to work, it is required the calling convention is consistent. Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Signed-off-by: Joergen Lundman <lundman@lundman.net> Closes openzfs#14569
1 parent 4f4252c commit 19b9a90

File tree

5 files changed

+56
-37
lines changed

5 files changed

+56
-37
lines changed

module/icp/algs/blake3/blake3_impl.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@
3434
(defined(__x86_64) && defined(HAVE_SSE2)) || \
3535
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
3636

37-
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
37+
extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
3838
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
3939
uint64_t counter, uint8_t flags);
4040

41-
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
41+
extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
4242
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
4343
uint64_t counter, uint8_t flags, uint8_t out[64]);
4444

45-
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
45+
extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
4646
size_t num_inputs, size_t blocks, const uint32_t key[8],
4747
uint64_t counter, boolean_t increment_counter, uint8_t flags,
4848
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@@ -100,15 +100,15 @@ const blake3_ops_t blake3_sse2_impl = {
100100
(defined(__x86_64) && defined(HAVE_SSE2)) || \
101101
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
102102

103-
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
103+
extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
104104
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
105105
uint64_t counter, uint8_t flags);
106106

107-
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
107+
extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
108108
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
109109
uint64_t counter, uint8_t flags, uint8_t out[64]);
110110

111-
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
111+
extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
112112
size_t num_inputs, size_t blocks, const uint32_t key[8],
113113
uint64_t counter, boolean_t increment_counter, uint8_t flags,
114114
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@@ -163,7 +163,7 @@ const blake3_ops_t blake3_sse41_impl = {
163163
#endif
164164

165165
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
166-
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
166+
extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
167167
size_t num_inputs, size_t blocks, const uint32_t key[8],
168168
uint64_t counter, boolean_t increment_counter, uint8_t flags,
169169
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
@@ -196,15 +196,15 @@ blake3_avx2_impl = {
196196
#endif
197197

198198
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
199-
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
199+
extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
200200
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
201201
uint64_t counter, uint8_t flags);
202202

203-
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
203+
extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
204204
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
205205
uint64_t counter, uint8_t flags, uint8_t out[64]);
206206

207-
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
207+
extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
208208
size_t num_inputs, size_t blocks, const uint32_t key[8],
209209
uint64_t counter, boolean_t increment_counter, uint8_t flags,
210210
uint8_t flags_start, uint8_t flags_end, uint8_t *out);

module/icp/algs/sha2/sha256_impl.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929
#include <sys/simd.h>
3030

3131
#include <sha2/sha2_impl.h>
32+
#include <sys/asm_linkage.h>
3233

3334
#define TF(E, N) \
34-
extern void E(uint32_t s[8], const void *, size_t); \
35+
extern void ASMABI E(uint32_t s[8], const void *, size_t); \
3536
static inline void N(uint32_t s[8], const void *d, size_t b) { \
3637
kfpu_begin(); E(s, d, b); kfpu_end(); \
3738
}
@@ -44,10 +45,19 @@ static inline boolean_t sha2_is_supported(void)
4445

4546
#if defined(__x86_64)
4647

47-
extern void zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t);
48+
/* Users of ASMABI requires all calls to be from wrappers */
49+
extern void ASMABI
50+
zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t);
51+
52+
static inline void
53+
tf_sha256_transform_x64(uint32_t s[8], const void *d, size_t b)
54+
{
55+
zfs_sha256_transform_x64(s, d, b);
56+
}
57+
4858
const sha256_ops_t sha256_x64_impl = {
4959
.is_supported = sha2_is_supported,
50-
.transform = zfs_sha256_transform_x64,
60+
.transform = tf_sha256_transform_x64,
5161
.name = "x64"
5262
};
5363

module/icp/algs/sha2/sha512_impl.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929
#include <sys/simd.h>
3030

3131
#include <sha2/sha2_impl.h>
32+
#include <sys/asm_linkage.h>
3233

3334
#define TF(E, N) \
34-
extern void E(uint64_t s[8], const void *, size_t); \
35+
extern void ASMABI E(uint64_t s[8], const void *, size_t); \
3536
static inline void N(uint64_t s[8], const void *d, size_t b) { \
3637
kfpu_begin(); E(s, d, b); kfpu_end(); \
3738
}
@@ -44,10 +45,18 @@ static inline boolean_t sha2_is_supported(void)
4445

4546
#if defined(__x86_64)
4647

47-
extern void zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t);
48+
/* Users of ASMABI requires all calls to be from wrappers */
49+
extern void ASMABI
50+
zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t);
51+
52+
static inline void
53+
tf_sha512_transform_x64(uint64_t s[8], const void *d, size_t b)
54+
{
55+
zfs_sha512_transform_x64(s, d, b);
56+
}
4857
const sha512_ops_t sha512_x64_impl = {
4958
.is_supported = sha2_is_supported,
50-
.transform = zfs_sha512_transform_x64,
59+
.transform = tf_sha512_transform_x64,
5160
.name = "x64"
5261
};
5362

module/icp/asm-x86_64/sha2/sha256-x86_64.S

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626

2727
SECTION_STATIC
2828

29-
.align 64
30-
.type K256,@object
29+
.balign 64
30+
SET_OBJ(K256)
3131
K256:
3232
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
3333
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@@ -105,7 +105,7 @@ ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
105105
movl 24(%rdi),%r10d
106106
movl 28(%rdi),%r11d
107107
jmp .Lloop
108-
.align 16
108+
.balign 16
109109
.Lloop:
110110
movl %ebx,%edi
111111
leaq K256(%rip),%rbp
@@ -622,7 +622,7 @@ ENTRY_ALIGN(zfs_sha256_transform_x64, 16)
622622
addl %r12d,%eax
623623
leaq 20(%rbp),%rbp
624624
jmp .Lrounds_16_xx
625-
.align 16
625+
.balign 16
626626
.Lrounds_16_xx:
627627
movl 4(%rsp),%r13d
628628
movl 56(%rsp),%r15d
@@ -1436,7 +1436,7 @@ ENTRY_ALIGN(zfs_sha256_transform_shani, 64)
14361436
punpcklqdq %xmm0,%xmm2
14371437
jmp .Loop_shani
14381438

1439-
.align 16
1439+
.balign 16
14401440
.Loop_shani:
14411441
movdqu (%rsi),%xmm3
14421442
movdqu 16(%rsi),%xmm4
@@ -1666,7 +1666,7 @@ ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
16661666
movl 28(%rdi),%r11d
16671667

16681668
jmp .Lloop_ssse3
1669-
.align 16
1669+
.balign 16
16701670
.Lloop_ssse3:
16711671
movdqa K256+512(%rip),%xmm7
16721672
movdqu 0(%rsi),%xmm0
@@ -1696,7 +1696,7 @@ ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64)
16961696
movl %r8d,%r13d
16971697
jmp .Lssse3_00_47
16981698

1699-
.align 16
1699+
.balign 16
17001700
.Lssse3_00_47:
17011701
subq $-128,%rbp
17021702
rorl $14,%r13d
@@ -2779,7 +2779,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
27792779
vmovdqa K256+512+32(%rip),%xmm8
27802780
vmovdqa K256+512+64(%rip),%xmm9
27812781
jmp .Lloop_avx
2782-
.align 16
2782+
.balign 16
27832783
.Lloop_avx:
27842784
vmovdqa K256+512(%rip),%xmm7
27852785
vmovdqu 0(%rsi),%xmm0
@@ -2805,7 +2805,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx, 64)
28052805
movl %r8d,%r13d
28062806
jmp .Lavx_00_47
28072807

2808-
.align 16
2808+
.balign 16
28092809
.Lavx_00_47:
28102810
subq $-128,%rbp
28112811
vpalignr $4,%xmm0,%xmm1,%xmm4
@@ -3858,7 +3858,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
38583858
vmovdqa K256+512+32(%rip),%ymm8
38593859
vmovdqa K256+512+64(%rip),%ymm9
38603860
jmp .Loop_avx2
3861-
.align 16
3861+
.balign 16
38623862
.Loop_avx2:
38633863
vmovdqa K256+512(%rip),%ymm7
38643864
vmovdqu -64+0(%rsi),%xmm0
@@ -3900,7 +3900,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
39003900
subq $-32*4,%rbp
39013901
jmp .Lavx2_00_47
39023902

3903-
.align 16
3903+
.balign 16
39043904
.Lavx2_00_47:
39053905
leaq -64(%rsp),%rsp
39063906
.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
@@ -4842,7 +4842,7 @@ ENTRY_ALIGN(zfs_sha256_transform_avx2, 64)
48424842
xorl %ecx,%edi
48434843
movl %r9d,%r12d
48444844
jmp .Lower_avx2
4845-
.align 16
4845+
.balign 16
48464846
.Lower_avx2:
48474847
addl 0+16(%rbp),%r11d
48484848
andl %r8d,%r12d

module/icp/asm-x86_64/sha2/sha512-x86_64.S

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626

2727
SECTION_STATIC
2828

29-
.align 64
30-
.type K512,@object
29+
.balign 64
30+
SET_OBJ(K512)
3131
K512:
3232
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
3333
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
@@ -148,7 +148,7 @@ ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
148148
movq 48(%rdi),%r10
149149
movq 56(%rdi),%r11
150150
jmp .Lloop
151-
.align 16
151+
.balign 16
152152
.Lloop:
153153
movq %rbx,%rdi
154154
leaq K512(%rip),%rbp
@@ -665,7 +665,7 @@ ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
665665
addq %r12,%rax
666666
leaq 24(%rbp),%rbp
667667
jmp .Lrounds_16_xx
668-
.align 16
668+
.balign 16
669669
.Lrounds_16_xx:
670670
movq 8(%rsp),%r13
671671
movq 112(%rsp),%r15
@@ -1501,7 +1501,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx, 64)
15011501
movq 48(%rdi),%r10
15021502
movq 56(%rdi),%r11
15031503
jmp .Lloop_avx
1504-
.align 16
1504+
.balign 16
15051505
.Lloop_avx:
15061506
vmovdqa K512+1280(%rip),%xmm11
15071507
vmovdqu 0(%rsi),%xmm0
@@ -1543,7 +1543,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx, 64)
15431543
movq %r8,%r13
15441544
jmp .Lavx_00_47
15451545

1546-
.align 16
1546+
.balign 16
15471547
.Lavx_00_47:
15481548
addq $256,%rbp
15491549
vpalignr $8,%xmm0,%xmm1,%xmm8
@@ -2670,7 +2670,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
26702670
movq 48(%rdi),%r10
26712671
movq 56(%rdi),%r11
26722672
jmp .Loop_avx2
2673-
.align 16
2673+
.balign 16
26742674
.Loop_avx2:
26752675
vmovdqu -128(%rsi),%xmm0
26762676
vmovdqu -128+16(%rsi),%xmm1
@@ -2732,7 +2732,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
27322732
addq $32*8,%rbp
27332733
jmp .Lavx2_00_47
27342734

2735-
.align 16
2735+
.balign 16
27362736
.Lavx2_00_47:
27372737
leaq -128(%rsp),%rsp
27382738
.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
@@ -3750,7 +3750,7 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
37503750
xorq %rcx,%rdi
37513751
movq %r9,%r12
37523752
jmp .Lower_avx2
3753-
.align 16
3753+
.balign 16
37543754
.Lower_avx2:
37553755
addq 0+16(%rbp),%r11
37563756
andq %r8,%r12

0 commit comments

Comments
 (0)