Skip to content

Commit 70a2b52

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. It also adds support for 128 bit lowering in tablegen to support SSE/AVX. commit-id:f430c1af
1 parent 6eb10b5 commit 70a2b52

File tree

6 files changed

+359
-82
lines changed

6 files changed

+359
-82
lines changed

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,6 +1904,20 @@ def atomic_load_64 :
19041904
let MemoryVT = i64;
19051905
}
19061906

1907+
def atomic_load_128_v2i64 :
1908+
PatFrag<(ops node:$ptr),
1909+
(atomic_load node:$ptr)> {
1910+
let IsAtomic = true;
1911+
let MemoryVT = v2i64;
1912+
}
1913+
1914+
def atomic_load_128_v4i32 :
1915+
PatFrag<(ops node:$ptr),
1916+
(atomic_load node:$ptr)> {
1917+
let IsAtomic = true;
1918+
let MemoryVT = v4i32;
1919+
}
1920+
19071921
def atomic_load_nonext_8 :
19081922
PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> {
19091923
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20662066
I->replaceAllUsesWith(V);
20672067
} else if (HasResult) {
20682068
Value *V;
2069-
if (UseSizedLibcall)
2070-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2071-
else {
2069+
if (UseSizedLibcall) {
2070+
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2071+
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2072+
auto *VTy = dyn_cast<VectorType>(I->getType());
2073+
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2074+
unsigned AS = PtrTy->getAddressSpace();
2075+
Value *BC = Builder.CreateBitCast(
2076+
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2077+
V = Builder.CreateIntToPtr(BC, I->getType());
2078+
} else
2079+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2080+
} else {
20722081
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20732082
AllocaAlignment);
20742083
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,6 +1211,11 @@ def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
12111211
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
12121212
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
12131213

1214+
def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
1215+
(VMOVAPDrm addr:$src)>; // load atomic <2 x i64>
1216+
def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
1217+
(VMOVAPDrm addr:$src)>; // load atomic <4 x i32>
1218+
12141219
// Floating point loads/stores.
12151220
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12161221
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

llvm/test/CodeGen/ARM/atomic-load-store.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: pop {r11, lr}
1020+
; ARMV4-NEXT: mov pc, lr
1021+
;
1022+
; ARMV6-LABEL: atomic_vec1_ptr:
1023+
; ARMV6: @ %bb.0:
1024+
; ARMV6-NEXT: ldr r0, [r0]
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: bx lr
1028+
;
1029+
; THUMBM-LABEL: atomic_vec1_ptr:
1030+
; THUMBM: @ %bb.0:
1031+
; THUMBM-NEXT: ldr r0, [r0]
1032+
; THUMBM-NEXT: dmb sy
1033+
; THUMBM-NEXT: bx lr
1034+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1035+
ret <1 x ptr> %ret
1036+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,53 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
860860
ret <2 x i32> %ret
861861
}
862862

863+
; Move td records to AtomicExpand
864+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
865+
; CHECK-O3-LABEL: atomic_vec2_ptr_align:
866+
; CHECK-O3: # %bb.0:
867+
; CHECK-O3-NEXT: pushq %rax
868+
; CHECK-O3-NEXT: movl $2, %esi
869+
; CHECK-O3-NEXT: callq __atomic_load_16@PLT
870+
; CHECK-O3-NEXT: movq %rdx, %xmm1
871+
; CHECK-O3-NEXT: movq %rax, %xmm0
872+
; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
873+
; CHECK-O3-NEXT: popq %rax
874+
; CHECK-O3-NEXT: retq
875+
;
876+
; CHECK-SSE-O3-LABEL: atomic_vec2_ptr_align:
877+
; CHECK-SSE-O3: # %bb.0:
878+
; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0
879+
; CHECK-SSE-O3-NEXT: retq
880+
;
881+
; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align:
882+
; CHECK-AVX-O3: # %bb.0:
883+
; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
884+
; CHECK-AVX-O3-NEXT: retq
885+
;
886+
; CHECK-O0-LABEL: atomic_vec2_ptr_align:
887+
; CHECK-O0: # %bb.0:
888+
; CHECK-O0-NEXT: pushq %rax
889+
; CHECK-O0-NEXT: movl $2, %esi
890+
; CHECK-O0-NEXT: callq __atomic_load_16@PLT
891+
; CHECK-O0-NEXT: movq %rdx, %xmm1
892+
; CHECK-O0-NEXT: movq %rax, %xmm0
893+
; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
894+
; CHECK-O0-NEXT: popq %rax
895+
; CHECK-O0-NEXT: retq
896+
;
897+
; CHECK-SSE-O0-LABEL: atomic_vec2_ptr_align:
898+
; CHECK-SSE-O0: # %bb.0:
899+
; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0
900+
; CHECK-SSE-O0-NEXT: retq
901+
;
902+
; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align:
903+
; CHECK-AVX-O0: # %bb.0:
904+
; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
905+
; CHECK-AVX-O0-NEXT: retq
906+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
907+
ret <2 x ptr> %ret
908+
}
909+
863910
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
864911
; CHECK-O3-LABEL: atomic_vec4_i8:
865912
; CHECK-O3: # %bb.0:
@@ -903,6 +950,52 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
903950
ret <4 x i16> %ret
904951
}
905952

953+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
954+
; CHECK-O3-LABEL: atomic_vec4_ptr270:
955+
; CHECK-O3: # %bb.0:
956+
; CHECK-O3-NEXT: pushq %rax
957+
; CHECK-O3-NEXT: movl $2, %esi
958+
; CHECK-O3-NEXT: callq __atomic_load_16@PLT
959+
; CHECK-O3-NEXT: movq %rdx, %xmm1
960+
; CHECK-O3-NEXT: movq %rax, %xmm0
961+
; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
962+
; CHECK-O3-NEXT: popq %rax
963+
; CHECK-O3-NEXT: retq
964+
;
965+
; CHECK-SSE-O3-LABEL: atomic_vec4_ptr270:
966+
; CHECK-SSE-O3: # %bb.0:
967+
; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0
968+
; CHECK-SSE-O3-NEXT: retq
969+
;
970+
; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270:
971+
; CHECK-AVX-O3: # %bb.0:
972+
; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
973+
; CHECK-AVX-O3-NEXT: retq
974+
;
975+
; CHECK-O0-LABEL: atomic_vec4_ptr270:
976+
; CHECK-O0: # %bb.0:
977+
; CHECK-O0-NEXT: pushq %rax
978+
; CHECK-O0-NEXT: movl $2, %esi
979+
; CHECK-O0-NEXT: callq __atomic_load_16@PLT
980+
; CHECK-O0-NEXT: movq %rdx, %xmm1
981+
; CHECK-O0-NEXT: movq %rax, %xmm0
982+
; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
983+
; CHECK-O0-NEXT: popq %rax
984+
; CHECK-O0-NEXT: retq
985+
;
986+
; CHECK-SSE-O0-LABEL: atomic_vec4_ptr270:
987+
; CHECK-SSE-O0: # %bb.0:
988+
; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0
989+
; CHECK-SSE-O0-NEXT: retq
990+
;
991+
; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270:
992+
; CHECK-AVX-O0: # %bb.0:
993+
; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
994+
; CHECK-AVX-O0-NEXT: retq
995+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
996+
ret <4 x ptr addrspace(270)> %ret
997+
}
998+
906999
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
9071000
; CHECK-LABEL: atomic_vec4_half:
9081001
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)