Skip to content

AMDGPU: Add baseline tests for fract combine with other fmin types #141986

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 29, 2025

No description provided.

Copy link
Contributor Author

arsenm commented May 29, 2025

@llvmbot
Copy link
Member

llvmbot commented May 29, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 38.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141986.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+872)
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index 5766bc0ae2898..9d98a8dab0501 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -2995,6 +2995,878 @@ entry:
   ret <2 x double> %cond6
 }
 
+define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
+; IR-LABEL: define float @safe_math_fract_f32_minimum(
+; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
+; IR-NEXT:  [[ENTRY:.*:]]
+; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
+; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
+; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
+; IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
+; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
+; IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
+; IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
+; IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
+; IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
+; IR-NEXT:    ret float [[COND6]]
+;
+; GFX6-LABEL: safe_math_fract_f32_minimum:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_floor_f32_e32 v3, v0
+; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s4, s6
+; GFX6-NEXT:    s_mov_b32 s5, s6
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: safe_math_fract_f32_minimum:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_floor_f32_e32 v3, v0
+; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX7-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX7-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_mov_b32 s6, 0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    s_mov_b32 s7, 0xf000
+; GFX7-NEXT:    s_mov_b32 s4, s6
+; GFX7-NEXT:    s_mov_b32 s5, s6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: safe_math_fract_f32_minimum:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_floor_f32_e32 v3, v0
+; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX8-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX8-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    global_store_dword v[1:2], v3, off
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: safe_math_fract_f32_minimum:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_floor_f32_e32 v3, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX11-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f32_minimum:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_floor_f32_e32 v3, v0
+; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX12-NEXT:    v_minimum_f32 v4, 0x3f7fffff, v4
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %floor = tail call float @llvm.floor.f32(float %x)
+  %sub = fsub float %x, %floor
+  %min = tail call float @llvm.minimum.f32(float %sub, float 0x3FEFFFFFE0000000)
+  %uno = fcmp uno float %x, 0.000000e+00
+  %cond = select i1 %uno, float %x, float %min
+  %fabs = tail call float @llvm.fabs.f32(float %x)
+  %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
+  %cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
+  store float %floor, ptr addrspace(1) %ip, align 4
+  ret float %cond6
+}
+
+define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
+; IR-LABEL: define float @safe_math_fract_f32_minimum_swap(
+; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
+; IR-NEXT:  [[ENTRY:.*:]]
+; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
+; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
+; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
+; IR-NEXT:    [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
+; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
+; IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
+; IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
+; IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
+; IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
+; IR-NEXT:    ret float [[COND6]]
+;
+; GFX6-LABEL: safe_math_fract_f32_minimum_swap:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_floor_f32_e32 v3, v0
+; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX6-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s4, s6
+; GFX6-NEXT:    s_mov_b32 s5, s6
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: safe_math_fract_f32_minimum_swap:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_floor_f32_e32 v3, v0
+; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX7-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX7-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_mov_b32 s6, 0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    s_mov_b32 s7, 0xf000
+; GFX7-NEXT:    s_mov_b32 s4, s6
+; GFX7-NEXT:    s_mov_b32 s5, s6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: safe_math_fract_f32_minimum_swap:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_floor_f32_e32 v3, v0
+; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX8-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX8-NEXT:    v_mov_b32_e32 v6, 0x7fc00000
+; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v4, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    global_store_dword v[1:2], v3, off
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: safe_math_fract_f32_minimum_swap:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_floor_f32_e32 v3, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX11-NEXT:    v_min_f32_e32 v5, 0x3f7fffff, v4
+; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v4, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7fc00000, v5, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f32_minimum_swap:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_floor_f32_e32 v3, v0
+; GFX12-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX12-NEXT:    v_minimum_f32 v4, 0x3f7fffff, v4
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc_lo
+; GFX12-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %floor = tail call float @llvm.floor.f32(float %x)
+  %sub = fsub float %x, %floor
+  %min = tail call float @llvm.minimum.f32(float %sub, float 0x3FEFFFFFE0000000)
+  %uno = fcmp ord float %x, 0.000000e+00
+  %cond = select i1 %uno, float %min, float %x
+  %fabs = tail call float @llvm.fabs.f32(float %x)
+  %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
+  %cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
+  store float %floor, ptr addrspace(1) %ip, align 4
+  ret float %cond6
+}
+
+define float @safe_math_fract_f32_minimumnum(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
+; IR-LABEL: define float @safe_math_fract_f32_minimumnum(
+; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
+; IR-NEXT:  [[ENTRY:.*:]]
+; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
+; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
+; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minimumnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
+; IR-NEXT:    [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
+; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
+; IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
+; IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
+; IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
+; IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
+; IR-NEXT:    ret float [[COND6]]
+;
+; GFX6-LABEL: safe_math_fract_f32_minimumnum:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_floor_f32_e32 v3, v0
+; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s4, s6
+; GFX6-NEXT:    s_mov_b32 s5, s6
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: safe_math_fract_f32_minimumnum:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_floor_f32_e32 v3, v0
+; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_mov_b32 s6, 0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    s_mov_b32 s7, 0xf000
+; GFX7-NEXT:    s_mov_b32 s4, s6
+; GFX7-NEXT:    s_mov_b32 s5, s6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: safe_math_fract_f32_minimumnum:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_floor_f32_e32 v3, v0
+; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX8-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    global_store_dword v[1:2], v3, off
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: safe_math_fract_f32_minimumnum:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_floor_f32_e32 v3, v0
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX11-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f32_minimumnum:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_floor_f32_e32 v3, v0
+; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX12-NEXT:    global_store_b32 v[1:2], v3, off
+; GFX12-NEXT:    v_min_num_f32_e32 v4, 0x3f7fffff, v4
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc_lo
+; GFX12-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT:    s_wait_alu 0xfffd
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %floor = tail call float @llvm.floor.f32(float %x)
+  %sub = fsub float %x, %floor
+  %min = tail call float @llvm.minimumnum.f32(float %sub, float 0x3FEFFFFFE0000000)
+  %uno = fcmp uno float %x, 0.000000e+00
+  %cond = select i1 %uno, float %x, float %min
+  %fabs = tail call float @llvm.fabs.f32(float %x)
+  %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
+  %cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
+  store float %floor, ptr addrspace(1) %ip, align 4
+  ret float %cond6
+}
+
+define float @safe_math_fract_f32_minimumnum_swap(float %x, ptr addrspace(1) writeonly captures(none) %ip) {
+; IR-LABEL: define float @safe_math_fract_f32_minimumnum_swap(
+; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
+; IR-NEXT:  [[ENTRY:.*:]]
+; IR-NEXT:    [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
+; IR-NEXT:    [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
+; IR-NEXT:    [[MIN:%.*]] = tail call float @llvm.minimumnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
+; IR-NEXT:    [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
+; IR-NEXT:    [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
+; IR-NEXT:    [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
+; IR-NEXT:    [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
+; IR-NEXT:    [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
+; IR-NEXT:    store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
+; IR-NEXT:    ret float [[COND6]]
+;
+; GFX6-LABEL: safe_math_fract_f32_minimumnum_swap:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_floor_f32_e32 v3, v0
+; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX6-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s4, s6
+; GFX6-NEXT:    s_mov_b32 s5, s6
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: safe_math_fract_f32_minimumnum_swap:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_floor_f32_e32 v3, v0
+; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_mov_b32 s6, 0
+; GFX7-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    s_mov_b32 s7, 0xf000
+; GFX7-NEXT:    s_mov_b32 s4, s6
+; GFX7-NEXT:    s_mov_b32 s5, s6
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: safe_math_fract_f32_minimumnum_swap:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_floor_f32_e32 v3, v0
+; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
+; GFX8-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
+; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+...
[truncated]

@arsenm arsenm marked this pull request as ready for review May 29, 2025 17:05
Copy link
Contributor Author

arsenm commented May 29, 2025

Merge activity

  • May 29, 7:50 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • May 29, 7:58 PM UTC: Graphite rebased this pull request as part of a merge.
  • May 29, 8:02 PM UTC: Graphite rebased this pull request as part of a merge.
  • May 29, 8:05 PM UTC: @arsenm merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-missing-fract-test branch from 2b6e81e to 5a35e4c Compare May 29, 2025 19:55
Base automatically changed from users/arsenm/amdgpu/add-missing-fract-test to main May 29, 2025 19:58
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-other-fmin-fract-combine-baseline-test branch from 825f96c to 966fa32 Compare May 29, 2025 19:58
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-other-fmin-fract-combine-baseline-test branch from 966fa32 to c057908 Compare May 29, 2025 20:01
@arsenm arsenm merged commit c569248 into main May 29, 2025
6 of 9 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/handle-other-fmin-fract-combine-baseline-test branch May 29, 2025 20:05
google-yfyang pushed a commit to google-yfyang/llvm-project that referenced this pull request May 29, 2025
sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Jun 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants