Skip to content

Commit 88e3dac

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent 053d34b commit 88e3dac

File tree

4 files changed

+158
-3
lines changed

4 files changed

+158
-3
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20662066
I->replaceAllUsesWith(V);
20672067
} else if (HasResult) {
20682068
Value *V;
2069-
if (UseSizedLibcall)
2070-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2071-
else {
2069+
if (UseSizedLibcall) {
2070+
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2071+
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2072+
auto *VTy = dyn_cast<VectorType>(I->getType());
2073+
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2074+
unsigned AS = PtrTy->getAddressSpace();
2075+
Value *BC = Builder.CreateBitCast(
2076+
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2077+
V = Builder.CreateIntToPtr(BC, I->getType());
2078+
} else
2079+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2080+
} else {
20722081
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20732082
AllocaAlignment);
20742083
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

llvm/test/CodeGen/ARM/atomic-load-store.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: pop {r11, lr}
1020+
; ARMV4-NEXT: mov pc, lr
1021+
;
1022+
; ARMV6-LABEL: atomic_vec1_ptr:
1023+
; ARMV6: @ %bb.0:
1024+
; ARMV6-NEXT: ldr r0, [r0]
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: bx lr
1028+
;
1029+
; THUMBM-LABEL: atomic_vec1_ptr:
1030+
; THUMBM: @ %bb.0:
1031+
; THUMBM-NEXT: ldr r0, [r0]
1032+
; THUMBM-NEXT: dmb sy
1033+
; THUMBM-NEXT: bx lr
1034+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1035+
ret <1 x ptr> %ret
1036+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
381381
ret <2 x i32> %ret
382382
}
383383

384+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
385+
; CHECK-LABEL: atomic_vec2_ptr_align:
386+
; CHECK: ## %bb.0:
387+
; CHECK-NEXT: pushq %rax
388+
; CHECK-NEXT: movl $2, %esi
389+
; CHECK-NEXT: callq ___atomic_load_16
390+
; CHECK-NEXT: movq %rdx, %xmm1
391+
; CHECK-NEXT: movq %rax, %xmm0
392+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
393+
; CHECK-NEXT: popq %rax
394+
; CHECK-NEXT: retq
395+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
396+
ret <2 x ptr> %ret
397+
}
398+
384399
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
385400
; CHECK3-LABEL: atomic_vec4_i8:
386401
; CHECK3: ## %bb.0:
@@ -404,6 +419,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
404419
ret <4 x i16> %ret
405420
}
406421

422+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
423+
; CHECK-LABEL: atomic_vec4_ptr270:
424+
; CHECK: ## %bb.0:
425+
; CHECK-NEXT: pushq %rax
426+
; CHECK-NEXT: movl $2, %esi
427+
; CHECK-NEXT: callq ___atomic_load_16
428+
; CHECK-NEXT: movq %rdx, %xmm1
429+
; CHECK-NEXT: movq %rax, %xmm0
430+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
431+
; CHECK-NEXT: popq %rax
432+
; CHECK-NEXT: retq
433+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
434+
ret <4 x ptr addrspace(270)> %ret
435+
}
436+
407437
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
408438
; CHECK-LABEL: atomic_vec4_half:
409439
; CHECK: ## %bb.0:

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,68 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
151151
ret void
152152
}
153153

154+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
155+
; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
156+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
157+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
158+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
159+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
160+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
161+
;
162+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
163+
ret <2 x ptr> %ret
164+
}
165+
166+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
167+
; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
168+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
169+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
170+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
171+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
172+
; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
173+
;
174+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
175+
ret <4 x ptr addrspace(270)> %ret
176+
}
177+
178+
define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
179+
; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
180+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
181+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
182+
; CHECK-NEXT: ret <2 x i16> [[RET]]
183+
;
184+
%ret = load atomic <2 x i16>, ptr %x acquire, align 8
185+
ret <2 x i16> %ret
186+
}
187+
188+
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
189+
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
190+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
191+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
192+
; CHECK-NEXT: ret <2 x half> [[RET]]
193+
;
194+
%ret = load atomic <2 x half>, ptr %x acquire, align 8
195+
ret <2 x half> %ret
196+
}
197+
198+
define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
199+
; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
200+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
201+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
202+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
203+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
204+
;
205+
%ret = load atomic <4 x i32>, ptr %x acquire, align 16
206+
ret <4 x i32> %ret
207+
}
208+
209+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
210+
; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
211+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
212+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
213+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
214+
; CHECK-NEXT: ret <4 x float> [[TMP2]]
215+
;
216+
%ret = load atomic <4 x float>, ptr %x acquire, align 16
217+
ret <4 x float> %ret
218+
}

0 commit comments

Comments
 (0)