-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86] Add atomic vector tests for unaligned >1 sizes. #120387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/f9d761c5
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesAtomic vectors with size >1 are lowered to calls. Adding Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120387.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 2bde0d2ffd06ad..435e58bef6642d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
ret <1 x bfloat> %ret
}
+define <1 x i64> @atomic_vec1_i64(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movq (%rsp), %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movq (%rsp), %rax
+; CHECK0-NEXT: popq %rcx
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+ ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) {
+; CHECK-LABEL: atomic_vec1_double:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT: popq %rax
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x double>, ptr %x acquire, align 4
+ ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec2_i32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT: popq %rax
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+ ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) {
+; CHECK-LABEL: atomic_vec4_float:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $16, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec4_float:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 32
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 4
+ ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) {
+; CHECK-LABEL: atomic_vec8_double:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $64, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec8_double:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $72, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 80
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $64, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movapd (%rsp), %xmm0
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT: addq $72, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <8 x double>, ptr %x acquire, align 4
+ ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_vec16_bfloat:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $32, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec16_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $40, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 48
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $32, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: addq $40, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+ ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) {
+; CHECK-LABEL: atomic_vec32_half:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $64, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec32_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $72, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 80
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $64, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT: addq $72, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <32 x half>, ptr %x acquire, align 4
+ ret <32 x half> %ret
+}
|
441e75a
to
3773e66
Compare
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { | |||
ret <1 x bfloat> %ret | |||
} | |||
|
|||
define <1 x i64> @atomic_vec1_i64(ptr %x) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add nounwind to get rid of cfi noise
3773e66
to
813fffe
Compare
141279f
to
70bb5b9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
70bb5b9
to
dac7f1e
Compare
05a76cf
to
bb71e93
Compare
dac7f1e
to
df5e28c
Compare
That's not true; they're only lowered to calls when the alignment is not known to be sufficient (e.g. |
; CHECK0-NEXT: movq (%rsp), %rax | ||
; CHECK0-NEXT: popq %rcx | ||
; CHECK0-NEXT: retq | ||
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should test the naturally aligned case, this under-aligned one is a separate test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added that one here: https://github.com/llvm/llvm-project/pull/120385/files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably should just add these kinds of cases with that PR. The set of strange under-aligned cases happen to work now, but I'm not sure that's enough reason to separately push them
bb71e93
to
5e8da05
Compare
255a011
to
e3dd939
Compare
5e8da05
to
e71ac05
Compare
; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero | ||
; CHECK0-NEXT: popq %rax | ||
; CHECK0-NEXT: retq | ||
%ret = load atomic <1 x double>, ptr %x acquire, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are all under aligned
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added this one here: https://github.com/llvm/llvm-project/pull/120386/files.
e3dd939
to
b336c25
Compare
0564ecb
to
454b8e6
Compare
b336c25
to
7ef2576
Compare
6078905
to
fdc2107
Compare
acfcbcc
to
93e8bef
Compare
5005b94
to
c7d4433
Compare
a9729ee
to
5ce8ea6
Compare
c7d4433
to
531bc05
Compare
5ce8ea6
to
01f388d
Compare
531bc05
to
70a0cad
Compare
01f388d
to
d09c5a1
Compare
0219785
to
e99bf25
Compare
d09c5a1
to
315e1fc
Compare
e99bf25
to
4843957
Compare
81e34f8
to
0a2f8f2
Compare
4843957
to
5d4d774
Compare
63a3178
to
d212710
Compare
d212710
to
7b4708f
Compare
ce52d52
to
87d478c
Compare
7b4708f
to
66ad4f4
Compare
Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6
66ad4f4
to
620e182
Compare
1e14381
to
9a088dc
Compare
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.
Stack: