Skip to content

Commit 3f62718

Browse files
authored
AMDGPU: Migrate some tests away from undef (#131277)
1 parent 5eb5577 commit 3f62718

29 files changed

+505
-431
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
769769
bb:
770770
%padding = alloca [64 x i32], align 4, addrspace(5)
771771
%i = alloca [32 x float], align 4, addrspace(5)
772-
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
772+
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
773773
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
774774
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
775775
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -956,7 +956,7 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
956956
bb:
957957
%padding = alloca [64 x i32], align 4, addrspace(5)
958958
%i = alloca [32 x float], align 4, addrspace(5)
959-
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
959+
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
960960
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
961961
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
962962
%i3 = zext i32 %i2 to i64
@@ -1153,7 +1153,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
11531153
bb:
11541154
%padding = alloca [64 x i32], align 4, addrspace(5)
11551155
%i = alloca [32 x float], align 4, addrspace(5)
1156-
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1156+
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 0
11571157
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
11581158
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
11591159
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1358,7 +1358,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
13581358
bb:
13591359
%padding = alloca [4096 x i32], align 4, addrspace(5)
13601360
%i = alloca [32 x float], align 4, addrspace(5)
1361-
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1361+
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
13621362
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
13631363
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
13641364
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1549,7 +1549,7 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
15491549
bb:
15501550
%padding = alloca [4096 x i32], align 4, addrspace(5)
15511551
%i = alloca [32 x float], align 4, addrspace(5)
1552-
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1552+
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
15531553
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
15541554
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
15551555
%i3 = zext i32 %i2 to i64
@@ -1750,7 +1750,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
17501750
bb:
17511751
%padding = alloca [4096 x i32], align 4, addrspace(5)
17521752
%i = alloca [32 x float], align 4, addrspace(5)
1753-
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
1753+
%pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 0
17541754
%pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
17551755
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
17561756
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -1900,7 +1900,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19001900
; UNALIGNED_GFX12-NEXT: s_endpgm
19011901
bb:
19021902
%i = alloca [4096 x i32], align 4, addrspace(5)
1903-
%i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
1903+
%i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 0
19041904
store volatile i32 13, ptr addrspace(5) %i1, align 4
19051905
%i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
19061906
store volatile i32 15, ptr addrspace(5) %i7, align 4
@@ -2055,7 +2055,7 @@ define void @store_load_large_imm_offset_foo() {
20552055
; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31]
20562056
bb:
20572057
%i = alloca [4096 x i32], align 4, addrspace(5)
2058-
%i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
2058+
%i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 0
20592059
store volatile i32 13, ptr addrspace(5) %i1, align 4
20602060
%i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
20612061
store volatile i32 15, ptr addrspace(5) %i7, align 4

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1681,7 +1681,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %ou
16811681
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
16821682
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
16831683
; GFX11-NEXT: s_endpgm
1684-
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
1684+
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double poison, i1 false)
16851685
%result0 = extractvalue { double, i1 } %result, 0
16861686
store double %result0, ptr addrspace(1) %out, align 8
16871687
ret void

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ define double @v_rsq_clamp_undef_f64() #0 {
215215
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
216216
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[2:3]
217217
; GFX12-NEXT: s_setpc_b64 s[30:31]
218-
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
218+
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double poison)
219219
ret double %rsq_clamp
220220
}
221221

llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7, <6 x i32> inreg %arg8) {
1313
.entry:
1414
%__llpc_global_proxy_7.i = alloca [3 x <4 x float>], align 16, addrspace(5)
15-
%tmp = icmp ult i32 undef, undef
15+
%tmp = icmp ult i32 %arg, 0
1616
br i1 %tmp, label %.beginls, label %.endls
1717

1818
.beginls: ; preds = %.entry

llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
; CHECK: GLOBAL_LOAD_DWORDX4
88
; CHECK: GLOBAL_LOAD_DWORDX4
99
; CHECK: GLOBAL_STORE_DWORDX4
10-
define protected amdgpu_kernel void @test1() local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
10+
define protected amdgpu_kernel void @test1(ptr addrspace(4) %ptr) local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
1111
entry:
12-
%tmp = load <3 x i64>, ptr addrspace(4) poison, align 16, !invariant.load !5
12+
%tmp = load <3 x i64>, ptr addrspace(4) %ptr, align 16, !invariant.load !5
1313
%srcA.load2 = extractelement <3 x i64> %tmp, i32 0
1414
%tmp1 = inttoptr i64 %srcA.load2 to ptr addrspace(1)
15-
%tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 undef
15+
%tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 0
1616
%tmp4 = load <3 x double>, ptr addrspace(1) %tmp2, align 8, !tbaa !6
1717
%tmp5 = extractelement <3 x double> %tmp4, i32 1
1818
%tmp6 = insertelement <3 x double> poison, double %tmp5, i32 1
@@ -34,12 +34,12 @@ entry:
3434
; CHECK: GLOBAL_LOAD_DWORDX2
3535
; CHECK: GLOBAL_LOAD_DWORDX2
3636
; CHECK: GLOBAL_STORE_DWORDX2
37-
define protected amdgpu_kernel void @test2() local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
37+
define protected amdgpu_kernel void @test2(ptr addrspace(4) %ptr) local_unnamed_addr !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 !kernel_arg_name !4 {
3838
entry:
39-
%tmp = load <3 x i64>, ptr addrspace(4) poison, align 16, !invariant.load !5
39+
%tmp = load <3 x i64>, ptr addrspace(4) %ptr, align 16, !invariant.load !5
4040
%srcA.load2 = extractelement <3 x i64> %tmp, i32 0
4141
%tmp1 = inttoptr i64 %srcA.load2 to ptr addrspace(1)
42-
%tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 undef
42+
%tmp2 = getelementptr inbounds double, ptr addrspace(1) %tmp1, i64 0
4343
%tmp4 = load <3 x double>, ptr addrspace(1) %tmp2, align 8, !tbaa !6
4444
%tmp5 = extractelement <3 x double> %tmp4, i32 1
4545
%tmp6 = insertelement <3 x double> poison, double %tmp5, i32 1

llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ for.body:
359359
%add = fadd float %vecload, 1.0
360360
store float %add, ptr addrspace(3) %arrayidx, align 8
361361
%inc = add i32 %indvar, 1
362-
br i1 undef, label %for.body, label %for.exit
362+
br i1 poison, label %for.body, label %for.exit
363363
}
364364

365365
define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {

llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll

+10-7
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,17 @@ ret:
9999

100100
; OPT-LABEL: @sink_ubfe_i16(
101101
; OPT: entry:
102+
; OPT-NEXT: icmp
102103
; OPT-NEXT: br i1
103104

104105
; OPT: bb0:
105-
; OPT: %0 = lshr i16 %arg1, 4
106-
; OPT-NEXT: %val0 = and i16 %0, 255
106+
; OPT: [[LSHR0:%[0-9]+]] = lshr i16 %arg1, 4
107+
; OPT-NEXT: %val0 = and i16 [[LSHR0]], 255
107108
; OPT: br label
108109

109110
; OPT: bb1:
110-
; OPT: %1 = lshr i16 %arg1, 4
111-
; OPT-NEXT: %val1 = and i16 %1, 127
111+
; OPT: [[LSHR1:%[0-9]+]] = lshr i16 %arg1, 4
112+
; OPT-NEXT: %val1 = and i16 [[LSHR1]], 127
112113
; OPT: br label
113114

114115
; OPT: ret:
@@ -123,19 +124,21 @@ ret:
123124
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
124125
; GCN: s_cbranch_scc{{[0-1]}}
125126

127+
; GCN: ; %bb.1:
126128
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
127129
; VI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7f
128130

129-
; GCN: .LBB2_3:
131+
; GCN: .LBB2_2:
130132
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
131133
; VI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff
132134

133135
; GCN: buffer_store_short
134136
; GCN: s_endpgm
135-
define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1) #0 {
137+
define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1, [8 x i32], i32 %arg2) #0 {
136138
entry:
137139
%shr = lshr i16 %arg1, 4
138-
br i1 undef, label %bb0, label %bb1
140+
%cond = icmp eq i32 %arg2, 0
141+
br i1 %cond, label %bb0, label %bb1
139142

140143
bb0:
141144
%val0 = and i16 %shr, 255

llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll

+25-13
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define i32 @combine_add_zext_xor() {
6363

6464
.a: ; preds = %bb9, %.entry
6565
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
66-
br i1 undef, label %bb9, label %bb
66+
br i1 poison, label %bb9, label %bb
6767

6868
bb: ; preds = %.a
6969
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -411,55 +411,66 @@ bb9: ; preds = %bb, %.a
411411

412412
; Test that unused lanes in the s_and result are masked out with v_cndmask.
413413

414-
define i32 @combine_sub_zext_and() {
414+
define i32 @combine_sub_zext_and(i32 inreg %cond) {
415415
; GFX1010-LABEL: combine_sub_zext_and:
416416
; GFX1010: ; %bb.0: ; %.entry
417417
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418+
; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
418419
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
420+
; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
421+
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
422+
; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
419423
; GFX1010-NEXT: s_branch .LBB5_2
420424
; GFX1010-NEXT: .LBB5_1: ; %bb9
421425
; GFX1010-NEXT: ; in Loop: Header=BB5_2 Depth=1
422426
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
423-
; GFX1010-NEXT: s_and_b32 s4, s4, vcc_lo
424-
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
427+
; GFX1010-NEXT: s_and_b32 s5, s5, vcc_lo
428+
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
425429
; GFX1010-NEXT: v_sub_nc_u32_e32 v1, v1, v0
426430
; GFX1010-NEXT: s_cbranch_vccz .LBB5_4
427431
; GFX1010-NEXT: .LBB5_2: ; %.a
428432
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
429-
; GFX1010-NEXT: ; implicit-def: $sgpr4
430-
; GFX1010-NEXT: s_cbranch_scc1 .LBB5_1
433+
; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
434+
; GFX1010-NEXT: ; implicit-def: $sgpr5
435+
; GFX1010-NEXT: s_cbranch_vccnz .LBB5_1
431436
; GFX1010-NEXT: ; %bb.3: ; %bb
432437
; GFX1010-NEXT: ; in Loop: Header=BB5_2 Depth=1
433438
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
434439
; GFX1010-NEXT: s_waitcnt vmcnt(0)
435-
; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
440+
; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
436441
; GFX1010-NEXT: s_branch .LBB5_1
437442
; GFX1010-NEXT: .LBB5_4: ; %.exit
438443
; GFX1010-NEXT: s_setpc_b64 s[30:31]
439444
;
440445
; GFX1100-LABEL: combine_sub_zext_and:
441446
; GFX1100: ; %bb.0: ; %.entry
442447
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448+
; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
443449
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
450+
; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
451+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
452+
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
453+
; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
444454
; GFX1100-NEXT: s_branch .LBB5_2
445455
; GFX1100-NEXT: .LBB5_1: ; %bb9
446456
; GFX1100-NEXT: ; in Loop: Header=BB5_2 Depth=1
447457
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
448458
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
449-
; GFX1100-NEXT: s_and_b32 s0, s0, vcc_lo
450-
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
459+
; GFX1100-NEXT: s_and_b32 s1, s1, vcc_lo
460+
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
451461
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
452462
; GFX1100-NEXT: v_sub_nc_u32_e32 v1, v1, v0
453463
; GFX1100-NEXT: s_cbranch_vccz .LBB5_4
454464
; GFX1100-NEXT: .LBB5_2: ; %.a
455465
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
456-
; GFX1100-NEXT: ; implicit-def: $sgpr0
457-
; GFX1100-NEXT: s_cbranch_scc1 .LBB5_1
466+
; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
467+
; GFX1100-NEXT: ; implicit-def: $sgpr1
468+
; GFX1100-NEXT: s_cbranch_vccnz .LBB5_1
458469
; GFX1100-NEXT: ; %bb.3: ; %bb
459470
; GFX1100-NEXT: ; in Loop: Header=BB5_2 Depth=1
460471
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
461472
; GFX1100-NEXT: s_waitcnt vmcnt(0)
462-
; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
473+
; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
463474
; GFX1100-NEXT: s_branch .LBB5_1
464475
; GFX1100-NEXT: .LBB5_4: ; %.exit
465476
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -468,7 +479,8 @@ define i32 @combine_sub_zext_and() {
468479

469480
.a: ; preds = %bb9, %.entry
470481
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
471-
br i1 undef, label %bb9, label %bb
482+
%cmp = icmp eq i32 %cond, 0
483+
br i1 %cmp, label %bb9, label %bb
472484

473485
bb: ; preds = %.a
474486
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)

llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -2993,7 +2993,7 @@ entry:
29932993

29942994
for.body.i: ; preds = %for.body.i, %entry
29952995
%retval.sroa.0.0.copyload = load ptr, ptr addrspace(1) poison, align 8
2996-
%add.ptr = getelementptr inbounds %Vec, ptr %retval.sroa.0.0.copyload, i64 undef
2996+
%add.ptr = getelementptr inbounds %Vec, ptr %retval.sroa.0.0.copyload, i64 0
29972997
%retval.sroa.0.0..sroa_cast_adr = addrspacecast ptr %add.ptr to ptr addrspace(1)
29982998
%retval.sroa.0.0.copyload.i = load i32, ptr addrspace(1) %retval.sroa.0.0..sroa_cast_adr, align 1
29992999
%p1.sroa.6.0.extract.shift = lshr i32 %retval.sroa.0.0.copyload.i, 24

llvm/test/CodeGen/AMDGPU/debug-value.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ bb:
1313
%tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
1414
%tmp11 = sext i32 %tmp10 to i64
1515
%tmp12 = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i64 %tmp11
16-
%tmp14 = getelementptr inbounds i8, ptr addrspace(1) %arg, i64 undef
17-
%tmp16 = getelementptr inbounds <4 x float>, ptr addrspace(1) %tmp14, i64 undef
16+
%tmp14 = getelementptr inbounds i8, ptr addrspace(1) %arg, i64 0
17+
%tmp16 = getelementptr inbounds <4 x float>, ptr addrspace(1) %tmp14, i64 0
1818
%tmp17 = load <4 x float>, ptr addrspace(1) %tmp16, align 16
1919
%tmp18 = fsub <4 x float> %tmp17, %tmp17
2020
%ext = extractelement <4 x float> %tmp18, i32 1
@@ -35,7 +35,7 @@ bb25: ; preds = %bb
3535

3636
bb28: ; preds = %bb25, %bb21
3737
%tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ]
38-
store <4 x float> %tmp29, ptr addrspace(5) poison, align 16
38+
store <4 x float> %tmp29, ptr addrspace(5) null, align 16
3939
%tmp30 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 2
4040
%tmp31 = load i32, ptr addrspace(1) %tmp30, align 4
4141
%tmp32 = sext i32 %tmp31 to i64
@@ -49,16 +49,16 @@ bb28: ; preds = %bb25, %bb21
4949
%tmp41 = fsub <4 x float> zeroinitializer, %tmp40
5050
%tmp42 = fsub <4 x float> %tmp39, %tmp40
5151
%tmp43 = extractelement <4 x float> %tmp40, i32 1
52-
%tmp44 = fsub float %tmp43, undef
53-
%tmp45 = fadd float undef, undef
52+
%tmp44 = fsub float %tmp43, 0.0
53+
%tmp45 = fadd float 0.0, 0.0
5454
%tmp46 = fdiv float %tmp44, %tmp45
5555
%tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
5656
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
5757
%tmp49 = fsub <4 x float> %tmp48, %tmp40
5858
%tmp50 = extractelement <4 x float> %tmp41, i32 1
5959
%tmp51 = extractelement <4 x float> %tmp42, i32 2
60-
%tmp52 = fmul float undef, undef
61-
%tmp53 = fadd float %tmp52, undef
60+
%tmp52 = fmul float 0.0, 0.0
61+
%tmp53 = fadd float %tmp52, 0.0
6262
%tmp54 = fadd float %tmp51, %tmp53
6363
%tmp55 = extractelement <4 x float> %tmp49, i32 1
6464
%tmp56 = fmul float %tmp55, %tmp50
@@ -72,7 +72,7 @@ bb28: ; preds = %bb25, %bb21
7272
%tmp59 = bitcast i64 %tmp35 to <2 x float>
7373
%tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
7474
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
75-
%tmp62 = fmul <2 x float> %tmp61, undef
75+
%tmp62 = fmul <2 x float> %tmp61, zeroinitializer
7676
%tmp63 = fsub <2 x float> %tmp62, %tmp59
7777
%tmp64 = extractelement <2 x float> %tmp63, i64 0
7878
call void @eggs(float %tmp64) #2

llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
; GCN-LABEL: {{^}}_amdgpu_ps_main:
1010
; GCN-NOT: v_readfirstlane
1111
; PRE-GFX9: flat_load_dword
12-
; GFX9: global_load
12+
; GFX9: global_load
1313
define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg) local_unnamed_addr #0 {
1414
.entry:
1515
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg) #1
1616
%tmp1 = bitcast float %tmp to i32
1717
%tmp2 = srem i32 %tmp1, 4
18-
%tmp3 = select i1 false, i32 undef, i32 %tmp2
18+
%tmp3 = select i1 false, i32 poison, i32 %tmp2
1919
%tmp4 = sext i32 %tmp3 to i64
2020
%tmp5 = getelementptr [4 x <4 x float>], ptr addrspace(4) @0, i64 0, i64 %tmp4
2121
%tmp6 = load <4 x float>, ptr addrspace(4) %tmp5, align 16

llvm/test/CodeGen/AMDGPU/early-if-convert.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ done:
386386
; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}}
387387
define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, ptr addrspace(1) %out) {
388388
entry:
389-
br i1 undef, label %else, label %if
389+
br i1 poison, label %else, label %if
390390

391391
if:
392392
br label %done

0 commit comments

Comments
 (0)