-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Make getAssumedAddrSpace
return AS1 for pointer kernel arguments
#137488
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Make getAssumedAddrSpace
return AS1 for pointer kernel arguments
#137488
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
getAssumedAddrSpace
return AS1 for pointer kernel argumentsgetAssumedAddrSpace
return AS1 for pointer kernel arguments
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesPatch is 1.04 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137488.diff 37 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eb50617b281a1..150060e1b266c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -951,6 +951,10 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
}
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
+ if (auto *Arg = dyn_cast<Argument>(V);
+ Arg && AMDGPU::isKernelCC(Arg->getParent()))
+ return AMDGPUAS::GLOBAL_ADDRESS;
+
const auto *LD = dyn_cast<LoadInst>(V);
if (!LD) // TODO: Handle invariant load like constant.
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1032dede7cb36..b6ad73a6acc94 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12597,29 +12597,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
}
ChangeStatus updateImpl(Attributor &A) override {
- unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
uint32_t OldAddressSpace = AssumedAddressSpace;
auto CheckAddressSpace = [&](Value &Obj) {
if (isa<UndefValue>(&Obj))
return true;
- // If an argument in flat address space only has addrspace cast uses, and
- // those casts are same, then we take the dst addrspace.
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
- if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
- unsigned CastAddrSpace = FlatAS;
- for (auto *U : Arg->users()) {
- auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
- if (!ASCI)
- return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
- if (CastAddrSpace != FlatAS &&
- CastAddrSpace != ASCI->getDestAddressSpace())
- return false;
- CastAddrSpace = ASCI->getDestAddressSpace();
- }
- if (CastAddrSpace != FlatAS)
- return takeAddressSpace(CastAddrSpace);
- }
+ auto *TTI =
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
+ *Arg->getParent());
+ unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
+ if (AssumedAS != ~0U)
+ return takeAddressSpace(AssumedAS);
}
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index aeb301939e986..0347d9e5c3110 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -946,7 +946,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_mov_b32_e32 v1, s3
; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
@@ -964,7 +964,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -974,52 +974,38 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
; GFX9-LABEL: flat_atomic_dec_ret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: flat_atomic_dec_ret_i32:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_add_u32 s12, s12, s17
-; GFX10-NEXT: s_addc_u32 s13, s13, 0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: buffer_gl0_inv
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: flat_atomic_dec_ret_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 glc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out, align 4
@@ -1040,7 +1026,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_mov_b32_e32 v1, s3
; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
@@ -1060,7 +1046,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -1070,54 +1056,38 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
; GFX9-LABEL: flat_atomic_dec_ret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: flat_atomic_dec_ret_i32_offset:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_add_u32 s12, s12, s17
-; GFX10-NEXT: s_addc_u32 s13, s13, 0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_u32 s2, s2, 16
-; GFX10-NEXT: s_addc_u32 s3, s3, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: buffer_gl0_inv
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: flat_atomic_dec_ret_i32_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
@@ -1139,7 +1109,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_mov_b32_e32 v1, s3
; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
@@ -1159,7 +1129,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -1169,54 +1139,38 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
; GFX9-LABEL: flat_atomic_dec_ret_i32_offset_system:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
-; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_store_dword v[0:1], v2
+; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: flat_atomic_dec_ret_i32_offset_system:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_add_u32 s12, s12, s17
-; GFX10-NEXT: s_addc_u32 s13, s13, 0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 42
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_u32 s2, s2, 16
-; GFX10-NEXT: s_addc_u32 s3, s3, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: v_mov_b32_e32 v1, s3
-; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc
-; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: buffer_gl0_inv
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_store_dword v[0:1], v2
+; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: flat_atomic_dec_ret_i32_offset_system:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX11-NEXT: v_mov_b32_e32 v2, 42
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_store_b32 v[0:1], v2
+; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
@@ -1236,7 +1190,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
; CI-NEXT: flat_atomic_dec v[0:1], v2
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: s_endpgm
;
@@ -1251,37 +1205,28 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v[0:1], v2
-; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: flat_atomic_dec_noret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_atomic_dec v[0:1], v2
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: flat_atomic_dec_noret_i32:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_add_u32 s12, s12, s17
-; GFX10-NEXT: s_addc_u32 s13, s13, 0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 42
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_atomic_dec v[0:1], v2
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: buffer_gl0_inv
@@ -1290,11 +1235,9 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
; GFX11-LABEL: flat_atomic_dec_noret_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-NEXT: v_mov_b32_e32 v2, 42
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
@@ -1317,7 +1260,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
; CI-NEXT: flat_atomic_dec v[0:1], v2
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_wbinvl1_vol
; CI-NEXT: s_endpgm
;
@@ -1334,39 +1277,28 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v[0:1], v2
-; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_wbinvl1_vol
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, 42
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: flat_atomic_dec v[0:1], v2 offset:16
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_add_u32 s12, s12, s17
-; GFX10-NEXT: s_addc_u32 s13, s13, 0
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
-; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 42
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_u32 s0, s0, 16
-; GFX10-NEXT: s_addc_u32 s1, s1, 0
-; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: v_mov_b32_e32 v1, s1
-; GFX10-NEXT: flat_atomic_dec v[0:1], v2
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_gl1_inv
; GFX10-NEXT: buffer_gl0_inv
@@ -1375,11 +1307,9 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; GFX11-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-NEXT: v_mov_b32_e32 v2, 42
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16
+; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
@@ -1403,7 +1333,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1
; CI-NEXT: v_mov_b32_e32 v0, s0
; CI-NEXT: v_mov_b32_e32 v1, s1
; CI-NEXT: flat_atomic_dec v[0:1], v2
-; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CI-NEXT:...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we delete the AMDGPUPromoteKernelArguments pass after this (but really clang should be fixed, we shouldn't have to do this)
Make preparation for #137488.
Yes, that's the plan.
And this is also in the TODO list. |
Make preparation for #137488.
97eaa7b
to
bfda0fe
Compare
getAssumedAddrSpace
return AS1 for pointer kernel argumentsgetAssumedAddrSpace
return AS1 for pointer kernel arguments
8e7a023
to
cb7d93b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've been thinking we should probably remove InferAddressSpaces from the codegen pass pipeline, it would avoid most of the test diffs here
45cf447
to
e37af8c
Compare
…137534) Make preparation for llvm#137488.
…137534) Make preparation for llvm#137488.
…137534) Make preparation for llvm#137488.
e37af8c
to
86b400a
Compare
…137534) Make preparation for llvm#137488.
86b400a
to
62942e8
Compare
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
Outdated
Show resolved
Hide resolved
62942e8
to
c24a60e
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/26513 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/133/builds/17048 Here is the relevant piece of the build log for the reference
|
test failure has been fixed in 0a75d8e |
No description provided.