Skip to content

[CGP] Eliminate noop bitcasts #146961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8720,6 +8720,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (isa<Constant>(CI->getOperand(0)))
return AnyChange;

// Remove noop bitcasts
if (isa<BitCastInst>(I) && I->getType() == I->getOperand(0)->getType()) {
replaceAllUsesWith(I, I->getOperand(0), FreshBBs, IsHugeFunc);
I->eraseFromParent();
return true;
}

if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]}

; MIR-LABEL: name: test_memcpy
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memcpy:
; CHECK: // %bb.0:
Expand All @@ -32,8 +32,8 @@ define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) {
}

; MIR-LABEL: name: test_memcpy_inline
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memcpy_inline:
; CHECK: // %bb.0:
Expand All @@ -55,8 +55,8 @@ define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) {
}

; MIR-LABEL: name: test_memmove
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memmove:
; CHECK: // %bb.0:
Expand All @@ -79,8 +79,8 @@ define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) {

; MIR-LABEL: name: test_memset
; MIR: %2:gpr64 = MOVi64imm -6148914691236517206
; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memset:
; CHECK: // %bb.0:
Expand All @@ -100,8 +100,8 @@ define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) {
}

; MIR-LABEL: name: test_mempcpy
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_mempcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_mempcpy:
; CHECK: // %bb.0:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define void @blam0(ptr %g0, ptr %g1) {
; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !3, !noalias !0)
; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !3, !noalias !0)
; MIR-NEXT: RET_ReallyLR
%tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0
%tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1
Expand All @@ -43,7 +43,7 @@ define void @blam1(ptr %g0, ptr %g1) {
; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !9, !noalias !10)
; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !9, !noalias !10)
; MIR-NEXT: RET_ReallyLR
%tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0
%tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_i32_func_i32_imm.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
Expand Down Expand Up @@ -2957,7 +2957,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_v33i32_func_v33i32_i32.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; GCN-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %18(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_call_external_void_func_p0_imm.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
Expand Down Expand Up @@ -4654,7 +4654,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.stack_passed_arg_alignment_v32i32_f64.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %18(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_indirect_call_sgpr_ptr.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.kernel_call_i32_fastcc_i32_i32_unused_result.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32)
; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.f1.kernarg.segment, addrspace 4)
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.f1.kernarg.segment + 16, align 16, addrspace 4)
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %7
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %8
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
Expand All @@ -40,7 +40,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
; PEI-GFX908-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; PEI-GFX908-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
Expand All @@ -60,7 +60,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %7
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %8
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
Expand All @@ -83,7 +83,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
; PEI-GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; PEI-GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN: bb.0 (%ir-block.0):
; GCN-NEXT: liveins: $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.test_spill_av_class.kernarg.segment, addrspace 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/isel-debug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
;
; Check that some debug output is printed without problems.
; CHECK: SystemZAddressingMode
; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.0)>
; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.ptr)>
; CHECK: Index
; CHECK: Disp

Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/Thumb2/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,14 @@ define void @test_width2(ptr nocapture readnone %x, ptr nocapture %y, i8 zeroext
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: bfi r0, r12, #0, #1
; CHECK-NEXT: sub.w r12, r1, #8
; CHECK-NEXT: bfi r0, r3, #1, #1
; CHECK-NEXT: lsls r3, r0, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrne.w r3, [r12]
; CHECK-NEXT: ldrne r3, [r1, #-8]
; CHECK-NEXT: vmovne.32 q0[0], r3
; CHECK-NEXT: lsls r0, r0, #30
; CHECK-NEXT: itt mi
; CHECK-NEXT: ldrmi.w r0, [r12, #4]
; CHECK-NEXT: ldrmi r0, [r1, #-4]
; CHECK-NEXT: vmovmi.32 q0[2], r0
; CHECK-NEXT: vmrs r3, p0
; CHECK-NEXT: and r0, r3, #1
Expand Down
Loading
Loading