Skip to content

AMDGPU: Replace some float undef test uses with poison #131090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ main_body:
%vcc = icmp eq i32 %val, 2
%a = select i1 %vcc, float %a0, float %a1
%b = select i1 %vcc, float %b0, float %b1
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float undef, float undef, i1 true, i1 true)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float poison, float poison, i1 true, i1 true)
ret void
}

Expand Down Expand Up @@ -52,7 +52,7 @@ main_body:
%uniform_i1 = icmp eq i32 %val, 2
%a = select i1 %uniform_i1, float %a0, float %a1
%b = select i1 %uniform_i1, float %b0, float %b1
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float undef, float undef, i1 true, i1 true)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float poison, float poison, i1 true, i1 true)
ret void
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}

Expand All @@ -23,7 +23,7 @@ define amdgpu_vs void @test_f32(float %arg0) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}

Expand Down Expand Up @@ -55,7 +55,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
%tmp0 = load volatile i32, ptr addrspace(4) %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1543,7 +1543,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %ou
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, 0x41000000
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float poison, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
Expand Down Expand Up @@ -1589,7 +1589,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %ou
; GFX11-NEXT: v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float poison, float 8.0, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
Expand Down Expand Up @@ -1633,7 +1633,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
%result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float poison, float poison, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ define float @v_mul_legacy_undef0_f32(float %a) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
%result = call float @llvm.amdgcn.fmul.legacy(float poison, float %a)
ret float %result
}

Expand Down Expand Up @@ -122,7 +122,7 @@ define float @v_mul_legacy_undef1_f32(float %a) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float poison)
ret float %result
}

Expand Down Expand Up @@ -162,7 +162,7 @@ define float @v_mul_legacy_undef_f32() {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, s0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef)
%result = call float @llvm.amdgcn.fmul.legacy(float poison, float poison)
ret float %result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ define float @v_rsq_clamp_undef_f32() #0 {
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, s0, 0x7f7fffff, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float poison)
ret float %rsq_clamp
}

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
Expand All @@ -23,7 +23,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
Expand All @@ -39,7 +39,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
Expand All @@ -55,7 +55,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
Expand All @@ -66,7 +66,7 @@ main_body:

define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4009,7 +4009,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
Expand Down Expand Up @@ -4049,9 +4049,9 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01)
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
Expand Down Expand Up @@ -4079,7 +4079,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]])
; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]]
Expand All @@ -4101,7 +4101,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3
; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
;
%const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
%const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
ret <4 x float> %const.partial.rcp
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
%rcf = bitcast i32 %rc to float
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %rcf, float undef, float undef, float undef, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %rcf, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bb2:
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
%undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
%undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

second undef is needed here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only touching float typed uses

%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
%tmp6 = fcmp olt float %arg2, 1.000000e+00
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/commute-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ bb:
%tmp5 = and i32 %tmp2, %tmp4
%tmp6 = icmp eq i32 %tmp5, 0
%tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
%tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp7)
%tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %tmp7)
%tmp9 = bitcast <2 x half> %tmp8 to float
ret float %tmp9
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ bb9: ; preds = %bb5

bb10: ; preds = %bb9, %bb5, %bb3, %bb
%tmp11 = phi float [ 1.000000e+00, %bb3 ], [ 0.000000e+00, %bb9 ], [ 1.000000e+00, %bb ], [ poison, %bb5 ]
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp11, float undef, float undef, float undef, i1 false, i1 false) #0
call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp11, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,21 +143,21 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX11-NEXT: v_max_f32_e32 v0, 0, v1
; GFX11-NEXT: ; return to shader part epilog
.entry:
%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2243 = extractelement <3 x float> %0, i32 2
%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 0, i32 0)
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%3 = bitcast <4 x i32> %2 to <4 x float>
%.i2248 = extractelement <4 x float> %3, i32 2
%.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
%4 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
%5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%4 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
%5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2333 = extractelement <3 x float> %5, i32 2
%6 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
%7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%6 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
%7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i1408 = extractelement <2 x float> %7, i32 1
%.i0364 = extractelement <2 x float> %7, i32 0
%8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 112, i32 0)
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%11 = bitcast <4 x i32> %10 to <4 x float>
Expand All @@ -175,7 +175,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
%.i2383 = fmul reassoc nnan nsz arcp contract afn float %.i2376, %6
%.i2386 = fadd reassoc nnan nsz arcp contract afn float %.i2370, %.i2383
%18 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
%18 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
%19 = fmul reassoc nnan nsz arcp contract afn float %18, %.i2363
%.i2394 = fsub reassoc nnan nsz arcp contract afn float %.i2386, %19
%.i2397 = fmul reassoc nnan nsz arcp contract afn float %.i2363, %18
Expand Down Expand Up @@ -206,8 +206,8 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2415 = fmul reassoc nnan nsz arcp contract afn float %.i2407, %41
%42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 undef, i32 undef, i32 0, <8 x i32> poison, i32 0, i32 0)
%.i2521 = extractelement <3 x float> %42, i32 2
%43 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
%44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%43 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
%44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2465 = extractelement <3 x float> %44, i32 2
%.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
%.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
Expand All @@ -224,7 +224,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2488 = fmul reassoc nnan nsz arcp contract afn float %.i2249, %18
%.i2491 = fmul reassoc nnan nsz arcp contract afn float %.i2485, %4
%.i2494 = fadd reassoc nnan nsz arcp contract afn float %.i2479, %.i2491
%51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%51 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2515 = extractelement <3 x float> %51, i32 2
%.i2516 = fadd reassoc nnan nsz arcp contract afn float %.i2515, %.i2494
%.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1
%tmp11 = load <2 x float>, ptr addrspace(4) %tmp10, align 8
%tmp12 = fadd nnan arcp contract <2 x float> zeroinitializer, %tmp11
%tmp13 = extractelement <2 x float> %tmp12, i32 1
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float undef, float %tmp13, float 0.000000e+00, float 1.000000e+00, i1 true, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float poison, float %tmp13, float 0.000000e+00, float 1.000000e+00, i1 true, i1 false) #1
ret void
}

Expand Down
Loading
Loading