-
Notifications
You must be signed in to change notification settings - Fork 13.3k
AMDGPU: Migrate some tests away from undef #131277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPatch is 104.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131277.diff 29 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a02e0b37479a0..ebb345f561687 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -769,7 +769,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -956,7 +956,7 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%i3 = zext i32 %i2 to i64
@@ -1153,7 +1153,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1358,7 +1358,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1549,7 +1549,7 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%i3 = zext i32 %i2 to i64
@@ -1750,7 +1750,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
- %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+ %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1900,7 +1900,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
- %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
+ %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 0
store volatile i32 13, ptr addrspace(5) %i1, align 4
%i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -2055,7 +2055,7 @@ define void @store_load_large_imm_offset_foo() {
; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
- %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
+ %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 0
store volatile i32 13, ptr addrspace(5) %i1, align 4
%i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
store volatile i32 15, ptr addrspace(5) %i7, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index ec893feb8d9cb..ce195593627db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -1681,7 +1681,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %ou
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_endpgm
- %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double poison, i1 false)
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, ptr addrspace(1) %out, align 8
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index c70a2e6ee6758..24fe2d1c41ffa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -215,7 +215,7 @@ define double @v_rsq_clamp_undef_f64() #0 {
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
+ %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double poison)
ret double %rsq_clamp
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
index 2ddb2fea5ddc6..67382d9cb47f5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
@@ -12,7 +12,7 @@
define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7, <6 x i32> inreg %arg8) {
.entry:
%__llpc_global_proxy_7.i = alloca [3 x <4 x float>], align 16, addrspace(5)
- %tmp = icmp ult i32 undef, undef
+ %tmp = icmp ult i32 %arg, 0
br i1 %tmp, label %.beginls, label %.endls
.beginls: ; preds = %.entry
diff --git a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
index d3808abc9432f..162b88d573624 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
@@ -7,12 +7,12 @@
; CHECK: GLOBAL_LOAD_DWORDX4
; CHECK: GLOBAL_LOAD_DWORDX4
; CHECK: GLOBAL_STORE_DWORDX4
-define protected amdgpu_kernel void @test1() local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
+define protected amdgpu_kernel void @test1(ptr addrspace(4) %ptr) local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
entry:
- %tmp = load <3 x i64>, ptr addrspace(4) poison, align 16, !invariant.load !5
+ %tmp = load <3 x i64>, ptr addrspace(4) %ptr, align 16, !invariant.load !5
%srcA.load2 = extractelement <3 x i64> %tmp, i32 0
%tmp1 = inttoptr i64 %srcA.load2 to ptr addrspace(1)
- %tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 undef
+ %tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 0
%tmp4 = load <3 x double>, ptr addrspace(1) %tmp2, align 8, !tbaa !6
%tmp5 = extractelement <3 x double> %tmp4, i32 1
%tmp6 = insertelement <3 x double> poison, double %tmp5, i32 1
@@ -34,12 +34,12 @@ entry:
; CHECK: GLOBAL_LOAD_DWORDX2
; CHECK: GLOBAL_LOAD_DWORDX2
; CHECK: GLOBAL_STORE_DWORDX2
-define protected amdgpu_kernel void @test2() local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
+define protected amdgpu_kernel void @test2(ptr addrspace(4) %ptr) local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
entry:
- %tmp = load <3 x i64>, ptr addrspace(4) poison, align 16, !invariant.load !5
+ %tmp = load <3 x i64>, ptr addrspace(4) %ptr, align 16, !invariant.load !5
%srcA.load2 = extractelement <3 x i64> %tmp, i32 0
%tmp1 = inttoptr i64 %srcA.load2 to ptr addrspace(1)
- %tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 undef
+ %tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 0
%tmp4 = load <3 x double>, ptr addrspace(1) %tmp2, align 8, !tbaa !6
%tmp5 = extractelement <3 x double> %tmp4, i32 1
%tmp6 = insertelement <3 x double> poison, double %tmp5, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index fa4e82da1d18e..7ce69fe2f4989 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -359,7 +359,7 @@ for.body:
%add = fadd float %vecload, 1.0
store float %add, ptr addrspace(3) %arrayidx, align 8
%inc = add i32 %indvar, 1
- br i1 undef, label %for.body, label %for.exit
+ br i1 poison, label %for.body, label %for.exit
}
define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
index 587f172c84edf..f712421083e6b 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
@@ -99,16 +99,17 @@ ret:
; OPT-LABEL: @sink_ubfe_i16(
; OPT: entry:
+; OPT-NEXT: icmp
; OPT-NEXT: br i1
; OPT: bb0:
-; OPT: %0 = lshr i16 %arg1, 4
-; OPT-NEXT: %val0 = and i16 %0, 255
+; OPT: [[LSHR0:%[0-9]+]] = lshr i16 %arg1, 4
+; OPT-NEXT: %val0 = and i16 [[LSHR0]], 255
; OPT: br label
; OPT: bb1:
-; OPT: %1 = lshr i16 %arg1, 4
-; OPT-NEXT: %val1 = and i16 %1, 127
+; OPT: [[LSHR1:%[0-9]+]] = lshr i16 %arg1, 4
+; OPT-NEXT: %val1 = and i16 [[LSHR1]], 127
; OPT: br label
; OPT: ret:
@@ -123,19 +124,21 @@ ret:
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc{{[0-1]}}
+; GCN: ; %bb.1:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7f
-; GCN: .LBB2_3:
+; GCN: .LBB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff
; GCN: buffer_store_short
; GCN: s_endpgm
-define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1, [8 x i32], i32 %arg2) #0 {
entry:
%shr = lshr i16 %arg1, 4
- br i1 undef, label %bb0, label %bb1
+ %cond = icmp eq i32 %arg2, 0
+ br i1 %cond, label %bb0, label %bb1
bb0:
%val0 = and i16 %shr, 255
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index 77dfc859cd1b1..434fc764e1fa6 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -63,7 +63,7 @@ define i32 @combine_add_zext_xor() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ br i1 poison, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -411,28 +411,33 @@ bb9: ; preds = %bb, %.a
; Test that unused lanes in the s_and result are masked out with v_cndmask.
-define i32 @combine_sub_zext_and() {
+define i32 @combine_sub_zext_and(i32 inreg %cond) {
; GFX1010-LABEL: combine_sub_zext_and:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB5_2
; GFX1010-NEXT: .LBB5_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1010-NEXT: s_and_b32 s4, s4, vcc_lo
-; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: s_and_b32 s5, s5, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
; GFX1010-NEXT: v_sub_nc_u32_e32 v1, v1, v0
; GFX1010-NEXT: s_cbranch_vccz .LBB5_4
; GFX1010-NEXT: .LBB5_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr4
-; GFX1010-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr5
+; GFX1010-NEXT: s_cbranch_vccnz .LBB5_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
; GFX1010-NEXT: s_branch .LBB5_1
; GFX1010-NEXT: .LBB5_4: ; %.exit
; GFX1010-NEXT: s_setpc_b64 s[30:31]
@@ -440,26 +445,32 @@ define i32 @combine_sub_zext_and() {
; GFX1100-LABEL: combine_sub_zext_and:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB5_2
; GFX1100-NEXT: .LBB5_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1100-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: s_and_b32 s1, s1, vcc_lo
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_sub_nc_u32_e32 v1, v1, v0
; GFX1100-NEXT: s_cbranch_vccz .LBB5_4
; GFX1100-NEXT: .LBB5_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr0
-; GFX1100-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr1
+; GFX1100-NEXT: s_cbranch_vccnz .LBB5_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
; GFX1100-NEXT: s_branch .LBB5_1
; GFX1100-NEXT: .LBB5_4: ; %.exit
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -468,7 +479,8 @@ define i32 @combine_sub_zext_and() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 9a5dcfc0e39b3..2ec6f7ab7602b 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -2993,7 +2993,7 @@ entry:
for.body.i: ; preds = %for.body.i, %entry
%retval.sroa.0.0.copyload = load ptr, ptr addrspace(1) poison, align 8
- %add.ptr = getelementptr inbounds %Vec, ptr %retval.sroa.0.0.copyload, i64 undef
+ %add.ptr = getelementptr inbounds %Vec, ptr %retval.sroa.0.0.copyload, i64 0
%retval.sroa.0.0..sroa_cast_adr = addrspacecast ptr %add.ptr to ptr addrspace(1)
%retval.sroa.0.0.copyload.i = load i32, ptr addrspace(1) %retval.sroa.0.0..sroa_cast_adr, align 1
%p1.sroa.6.0.extract.shift = lshr i32 %retval.sroa.0.0.copyload.i, 24
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll
index f13bd665cc7f0..60ffc28cef577 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@@ -13,8 +13,8 @@ bb:
%tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i64 %tmp11
- %tmp14 = getelementptr inbounds i8, ptr addrspace(1) %arg, i64 undef
- %tmp16 = getelementptr inbounds <4 x float>, ptr addrspace(1) %tmp14, i64 undef
+ %tmp14 = getelementptr inbounds i8, ptr addrspace(1) %arg, i64 0
+ %tmp16 = getelementptr inbounds <4 x float>, ptr addrspace(1) %tmp14, i64 0
%tmp17 = load <4 x float>, ptr addrspace(1) %tmp16, align 16
%tmp18 = fsub <4 x float> %tmp17, %tmp17
%ext = extractelement <4 x float> %tmp18, i32 1
@@ -35,7 +35,7 @@ bb25: ; preds = %bb
bb28: ; preds = %bb25, %bb21
%tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ]
- store <4 x float> %tmp29, ptr addrspace(5) poison, align 16
+ store <4 x float> %tmp29, ptr addrspace(5) null, align 16
%tmp30 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 2
%tmp31 = load i32, ptr addrspace(1) %tmp30, align 4
%tmp32 = sext i32 %tmp31 to i64
@@ -49,16 +49,16 @@ bb28: ; preds = %bb25, %bb21
%tmp41 = fsub <4 x float> zeroinitializer, %tmp40
%tmp42 = fsub <4 x float> %tmp39, %tmp40
%tmp43 = extractelement <4 x float> %tmp40, i32 1
- %tmp44 = fsub float %tmp43, undef
- %tmp45 = fadd float undef, undef
+ %tmp44 = fsub float %tmp43, 0.0
+ %tmp45 = fadd float 0.0, 0.0
%tmp46 = fdiv float %tmp44, %tmp45
%tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
%tmp49 = fsub <4 x float> %tmp48, %tmp40
%tmp50 = extractelement <4 x float> %tmp41, i32 1
%tmp51 = extractelement <4 x float> %tmp42, i32 2
- %tmp52 = fmul float undef, undef
- %tmp53 = fadd float %tmp52, undef
+ %tmp52 = fmul float 0.0, 0.0
+ %tmp53 = fadd float %tmp52, 0.0
%tmp54 = fadd float %tmp51, %tmp53
%tmp55 = extractelement <4 x float> %tmp49, i32 1
%tmp56 = fmul float %tmp55, %tmp50
@@ -72,7 +72,7 @@ bb28: ; preds = %bb25, %bb21
%tmp59 = bitcast i64 %tmp35 to <2 x float>
%tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
- %tmp62 = fmul <2 x float> %tmp61, undef
+ %tmp62 = fmul <2 x float> %tmp61, zeroinitializer
%tmp63 = fsub <2 x float> %tmp62, %tmp59
%tmp64 = extractelement <2 x float> %tmp63, i64 0
call void @eggs(float %tmp64) #2
diff --git a/llvm/test/CodeGen/AMDGPU/diver...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 737a0aeb6b4ec5bee87af6b5b1cb987427aef5f8 728bd6fe6d0e3cf4744fb7d39411f0a80b428d8d llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll llvm/test/CodeGen/AMDGPU/debug-value.ll llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll llvm/test/CodeGen/AMDGPU/early-if-convert.ll llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll llvm/test/CodeGen/AMDGPU/flat-scratch.ll llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll llvm/test/CodeGen/AMDGPU/infinite-loop.ll llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll llvm/test/CodeGen/AMDGPU/si-spill-cf.ll llvm/test/CodeGen/AMDGPU/smrd.ll llvm/test/CodeGen/AMDGPU/split-smrd.ll llvm/test/CodeGen/AMDGPU/swdev373493.ll llvm/test/CodeGen/AMDGPU/trunc-combine.ll llvm/test/CodeGen/AMDGPU/udiv.ll llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll llvm/test/CodeGen/AMDGPU/wqm.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
f605275
to
9616b2f
Compare
b837663
to
3b78faa
Compare
9616b2f
to
22a5723
Compare
3b78faa
to
a645362
Compare
22a5723
to
8d10c14
Compare
cfc134c
to
3399a02
Compare
3399a02
to
728bd6f
Compare
No description provided.