Skip to content

[X86] Add atomic vector tests for unaligned >1 sizes. #120387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/jofrn/spr/main/f9d761c5
Choose a base branch
from

Conversation

jofrn
Copy link
Contributor

@jofrn jofrn commented Dec 18, 2024

@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2024

@llvm/pr-subscribers-backend-x86

Author: None (jofrn)

Changes

Atomic vectors with size >1 are lowered to calls. Adding
their tests separately here.


Stack:

  • #120387 ⬅
  • #120386
  • #120385
  • #120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/120387.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+223)
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 2bde0d2ffd06ad..435e58bef6642d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
   ret <1 x bfloat> %ret
 }
 
+define <1 x i64> @atomic_vec1_i64(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movq (%rsp), %rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movq (%rsp), %rax
+; CHECK0-NEXT:    popq %rcx
+; CHECK0-NEXT:    retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) {
+; CHECK-LABEL: atomic_vec1_double:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:    popq %rax
+; CHECK0-NEXT:    retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec2_i32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:    popq %rax
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) {
+; CHECK-LABEL: atomic_vec4_float:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec4_float:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <4 x float>, ptr %x acquire, align 4
+  ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) {
+; CHECK-LABEL: atomic_vec8_double:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $64, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec8_double:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $72, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 80
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $64, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movapd (%rsp), %xmm0
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT:    addq $72, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <8 x double>, ptr %x acquire, align 4
+  ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_vec16_bfloat:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $32, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec16_bfloat:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $40, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $32, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    addq $40, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+  ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) {
+; CHECK-LABEL: atomic_vec32_half:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $64, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec32_half:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $72, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 80
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $64, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT:    addq $72, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <32 x half>, ptr %x acquire, align 4
+  ret <32 x half> %ret
+}

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 441e75a to 3773e66 Compare December 18, 2024 08:54
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
ret <1 x bfloat> %ret
}

define <1 x i64> @atomic_vec1_i64(ptr %x) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add nounwind to get rid of cfi noise

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3773e66 to 813fffe Compare December 18, 2024 11:45
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 141279f to 70bb5b9 Compare December 18, 2024 11:45
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 70bb5b9 to dac7f1e Compare December 18, 2024 19:11
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 05a76cf to bb71e93 Compare December 18, 2024 20:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from dac7f1e to df5e28c Compare December 18, 2024 20:47
@jyknight
Copy link
Member

Atomic vectors with size >1 are lowered to calls.

That's not true; they're only lowered to calls when the alignment is not known to be sufficient (e.g. <2 x i32> must have align 8, not align 4).

; CHECK0-NEXT: movq (%rsp), %rax
; CHECK0-NEXT: popq %rcx
; CHECK0-NEXT: retq
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should test the naturally aligned case, this under-aligned one is a separate test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should just add these kinds of cases with that PR. The set of strange under-aligned cases happen to work now, but I'm not sure that's enough reason to separately push them

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from bb71e93 to 5e8da05 Compare December 19, 2024 02:29
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch 2 times, most recently from 255a011 to e3dd939 Compare December 19, 2024 02:31
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 5e8da05 to e71ac05 Compare December 19, 2024 02:31
; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK0-NEXT: popq %rax
; CHECK0-NEXT: retq
%ret = load atomic <1 x double>, ptr %x acquire, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are all under aligned

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jofrn jofrn changed the title [X86] Add atomic vector tests for >1 sizes. [X86] Add atomic vector tests for unaligned >1 sizes. Dec 19, 2024
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from e3dd939 to b336c25 Compare December 19, 2024 13:16
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 0564ecb to 454b8e6 Compare December 19, 2024 16:01
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from b336c25 to 7ef2576 Compare December 19, 2024 16:01
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 6078905 to fdc2107 Compare May 6, 2025 03:50
@jofrn jofrn changed the base branch from users/jofrn/spr/main/f9d761c5 to main May 6, 2025 06:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from acfcbcc to 93e8bef Compare May 6, 2025 06:03
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/f9d761c5 May 6, 2025 06:04
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 5005b94 to c7d4433 Compare May 6, 2025 15:04
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from a9729ee to 5ce8ea6 Compare May 7, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from c7d4433 to 531bc05 Compare May 7, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 5ce8ea6 to 01f388d Compare May 8, 2025 01:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 531bc05 to 70a0cad Compare May 8, 2025 01:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 01f388d to d09c5a1 Compare May 8, 2025 23:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 0219785 to e99bf25 Compare May 9, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from d09c5a1 to 315e1fc Compare May 9, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from e99bf25 to 4843957 Compare May 9, 2025 19:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 81e34f8 to 0a2f8f2 Compare May 9, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 4843957 to 5d4d774 Compare May 9, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 3 times, most recently from 63a3178 to d212710 Compare May 12, 2025 05:34
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from d212710 to 7b4708f Compare May 27, 2025 17:34
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from ce52d52 to 87d478c Compare May 27, 2025 17:34
@jofrn jofrn changed the base branch from users/jofrn/spr/main/f9d761c5 to main June 1, 2025 20:46
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 7b4708f to 66ad4f4 Compare June 1, 2025 20:46
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/f9d761c5 June 1, 2025 20:46
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 66ad4f4 to 620e182 Compare June 2, 2025 04:15
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 1e14381 to 9a088dc Compare June 2, 2025 04:15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants