Skip to content

[SelectionDAG] Avoid one comparison when legalizing fmaximum #142732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2025

Conversation

nikic
Copy link
Contributor

@nikic nikic commented Jun 4, 2025

When ordering signed zero, only check the sign of one of the values. We already know at this point that both values must be +/-0.0, so it is sufficient to check one of them to correctly order them.

For example, for fmaximum, if we know LHS is +0.0 then we can always select LHS, value of RHS does not matter. If LHS is -0.0 we can always select RHS, value of RHS doesn't matter.

When ordering signed zero, only check the sign of one of the values.
We already know at this point that both values must be +/-0.0, so
it is sufficient to check one of them to correctly order them.
@llvmbot
Copy link
Member

llvmbot commented Jun 4, 2025

@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-nvptx

Author: Nikita Popov (nikic)

Changes

When ordering signed zero, only check the sign of one of the values. We already know at this point that both values must be +/-0.0, so it is sufficient to check one of them to correctly order them.

For example, for fmaximum, if we know LHS is +0.0 then we can always select LHS, value of RHS does not matter. If LHS is -0.0 we can always select RHS, value of RHS doesn't matter.


Patch is 99.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142732.diff

9 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+8-11)
  • (modified) llvm/test/CodeGen/AArch64/fmaximum-legalization.ll (+27-32)
  • (modified) llvm/test/CodeGen/ARM/fp-maximum-legalization.ll (+7-13)
  • (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+32-38)
  • (modified) llvm/test/CodeGen/NVPTX/math-intrins.ll (+222-254)
  • (modified) llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll (+43-49)
  • (modified) llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll (+175-234)
  • (modified) llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll (+176-227)
  • (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+83-100)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c8fe8971e593c..f34bf0ca7ede0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8610,19 +8610,16 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
   // fminimum/fmaximum requires -0.0 less than +0.0
   if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
       !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
-    auto IsSpecificZero = [&](SDValue F) {
-      FloatSignAsInt State;
-      DAG.getSignAsIntValue(State, DL, F);
-      return DAG.getSetCC(DL, CCVT, State.IntValue,
-                          DAG.getConstant(0, DL, State.IntValue.getValueType()),
-                          IsMax ? ISD::SETEQ : ISD::SETNE);
-    };
     SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
                                   DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
-    SDValue LCmp =
-        DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags);
-    SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags);
-    MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
+    FloatSignAsInt State;
+    DAG.getSignAsIntValue(State, DL, LHS);
+    SDValue IsSpecificZero =
+        DAG.getSetCC(DL, CCVT, State.IntValue,
+                     DAG.getConstant(0, DL, State.IntValue.getValueType()),
+                     IsMax ? ISD::SETEQ : ISD::SETNE);
+    SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
+    MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
   }
 
   return MinMax;
diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
index 9f542abcb80f7..b47470fb78234 100644
--- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
@@ -46,51 +46,46 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind {
 ; CHECK-LABEL: maximum_fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    str q0, [sp, #64]
+; CHECK-NEXT:    mov v2.16b, v1.16b
+; CHECK-NEXT:    ldrb w8, [sp, #79]
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
-; CHECK-NEXT:    stp q1, q0, [sp, #48]
-; CHECK-NEXT:    bl __gttf2
-; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    b.le .LBB1_2
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    b.ne .LBB1_2
 ; CHECK-NEXT:  // %bb.1:
-; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
 ; CHECK-NEXT:  .LBB1_2:
 ; CHECK-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    bl __unordtf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    stp q2, q0, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __gttf2
+; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
 ; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    b.eq .LBB1_4
+; CHECK-NEXT:    mov v2.16b, v1.16b
+; CHECK-NEXT:    b.le .LBB1_4
 ; CHECK-NEXT:  // %bb.3:
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    mov v2.16b, v0.16b
 ; CHECK-NEXT:  .LBB1_4:
-; CHECK-NEXT:    ldrb w8, [sp, #79]
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    b.ne .LBB1_6
+; CHECK-NEXT:    str q2, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __unordtf2
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    b.eq .LBB1_6
 ; CHECK-NEXT:  // %bb.5:
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
 ; CHECK-NEXT:  .LBB1_6:
-; CHECK-NEXT:    ldrb w8, [sp, #63]
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    b.ne .LBB1_8
-; CHECK-NEXT:  // %bb.7:
-; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:  .LBB1_8:
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    adrp x8, .LCPI1_1
-; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_1]
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __eqtf2
-; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    b.ne .LBB1_10
-; CHECK-NEXT:  // %bb.9:
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:  .LBB1_10:
+; CHECK-NEXT:    b.ne .LBB1_8
+; CHECK-NEXT:  // %bb.7:
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:  .LBB1_8:
 ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
index a3ab144356e16..f3e3e17a22eaf 100644
--- a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
+++ b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
@@ -4,7 +4,7 @@
 define double @maximum_double(double %x, double %y) nounwind {
 ; CHECK-LABEL: maximum_double:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    sub sp, sp, #8
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    vmov d16, r0, r1
@@ -12,32 +12,26 @@ define double @maximum_double(double %x, double %y) nounwind {
 ; CHECK-NEXT:    vcmp.f64 d16, d17
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    vstr d16, [sp, #8]
-; CHECK-NEXT:    vstr d17, [sp]
-; CHECK-NEXT:    ldrb r1, [sp, #15]
+; CHECK-NEXT:    vldr d18, .LCPI0_0
 ; CHECK-NEXT:    vmov.f64 d19, d17
+; CHECK-NEXT:    vstr d16, [sp]
+; CHECK-NEXT:    ldrb r1, [sp, #7]
 ; CHECK-NEXT:    clz r1, r1
-; CHECK-NEXT:    vldr d18, .LCPI0_0
 ; CHECK-NEXT:    movwvs r2, #1
 ; CHECK-NEXT:    movwgt r3, #1
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    vmovne.f64 d19, d16
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    ldrb r2, [sp, #7]
 ; CHECK-NEXT:    vmovne.f64 d19, d18
 ; CHECK-NEXT:    lsrs r1, r1, #5
-; CHECK-NEXT:    clz r1, r2
 ; CHECK-NEXT:    vcmp.f64 d19, #0
-; CHECK-NEXT:    vmov.f64 d18, d19
-; CHECK-NEXT:    vmovne.f64 d18, d16
-; CHECK-NEXT:    lsrs r1, r1, #5
-; CHECK-NEXT:    vmovne.f64 d18, d17
+; CHECK-NEXT:    vmovne.f64 d17, d16
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
 ; CHECK-NEXT:    movweq r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmovne.f64 d19, d18
+; CHECK-NEXT:    vmovne.f64 d19, d17
 ; CHECK-NEXT:    vmov r0, r1, d19
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    add sp, sp, #8
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 3
 ; CHECK-NEXT:  @ %bb.1:
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 096649e5bde43..e16ddb778099c 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1351,30 +1351,28 @@ define bfloat @test_roundeven(bfloat %a) {
 define bfloat @test_maximum(bfloat %a, bfloat %b) {
 ; SM70-LABEL: test_maximum(
 ; SM70:       {
-; SM70-NEXT:    .reg .pred %p<6>;
-; SM70-NEXT:    .reg .b16 %rs<8>;
+; SM70-NEXT:    .reg .pred %p<5>;
+; SM70-NEXT:    .reg .b16 %rs<7>;
 ; SM70-NEXT:    .reg .b32 %r<7>;
 ; SM70-EMPTY:
 ; SM70-NEXT:  // %bb.0:
 ; SM70-NEXT:    ld.param.b16 %rs1, [test_maximum_param_0];
+; SM70-NEXT:    setp.eq.s16 %p1, %rs1, 0;
 ; SM70-NEXT:    ld.param.b16 %rs2, [test_maximum_param_1];
+; SM70-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; SM70-NEXT:    cvt.u32.u16 %r1, %rs2;
 ; SM70-NEXT:    shl.b32 %r2, %r1, 16;
 ; SM70-NEXT:    cvt.u32.u16 %r3, %rs1;
 ; SM70-NEXT:    shl.b32 %r4, %r3, 16;
-; SM70-NEXT:    setp.gt.f32 %p1, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; SM70-NEXT:    setp.nan.f32 %p2, %r4, %r2;
-; SM70-NEXT:    selp.b16 %rs4, 0x7FC0, %rs3, %p2;
-; SM70-NEXT:    setp.eq.s16 %p3, %rs1, 0;
-; SM70-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; SM70-NEXT:    setp.eq.s16 %p4, %rs2, 0;
-; SM70-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
-; SM70-NEXT:    cvt.u32.u16 %r5, %rs4;
+; SM70-NEXT:    setp.gt.f32 %p2, %r4, %r2;
+; SM70-NEXT:    selp.b16 %rs4, %rs1, %rs2, %p2;
+; SM70-NEXT:    setp.nan.f32 %p3, %r4, %r2;
+; SM70-NEXT:    selp.b16 %rs5, 0x7FC0, %rs4, %p3;
+; SM70-NEXT:    cvt.u32.u16 %r5, %rs5;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
-; SM70-NEXT:    setp.eq.f32 %p5, %r6, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; SM70-NEXT:    st.param.b16 [func_retval0], %rs7;
+; SM70-NEXT:    setp.eq.f32 %p4, %r6, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs6, %rs3, %rs5, %p4;
+; SM70-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; SM70-NEXT:    ret;
 ;
 ; SM80-LABEL: test_maximum(
@@ -1475,48 +1473,44 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
 define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; SM70-LABEL: test_maximum_v2(
 ; SM70:       {
-; SM70-NEXT:    .reg .pred %p<11>;
-; SM70-NEXT:    .reg .b16 %rs<19>;
+; SM70-NEXT:    .reg .pred %p<9>;
+; SM70-NEXT:    .reg .b16 %rs<15>;
 ; SM70-NEXT:    .reg .b32 %r<16>;
 ; SM70-EMPTY:
 ; SM70-NEXT:  // %bb.0:
 ; SM70-NEXT:    ld.param.b32 %r1, [test_maximum_v2_param_0];
 ; SM70-NEXT:    ld.param.b32 %r2, [test_maximum_v2_param_1];
 ; SM70-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
+; SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
+; SM70-NEXT:    setp.eq.s16 %p1, %rs4, 0;
+; SM70-NEXT:    selp.b16 %rs7, %rs4, %rs2, %p1;
 ; SM70-NEXT:    cvt.u32.u16 %r3, %rs2;
 ; SM70-NEXT:    shl.b32 %r4, %r3, 16;
-; SM70-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
 ; SM70-NEXT:    cvt.u32.u16 %r5, %rs4;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
-; SM70-NEXT:    setp.gt.f32 %p1, %r6, %r4;
-; SM70-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
-; SM70-NEXT:    setp.nan.f32 %p2, %r6, %r4;
-; SM70-NEXT:    selp.b16 %rs6, 0x7FC0, %rs5, %p2;
-; SM70-NEXT:    setp.eq.s16 %p3, %rs4, 0;
-; SM70-NEXT:    selp.b16 %rs9, %rs4, %rs6, %p3;
-; SM70-NEXT:    setp.eq.s16 %p4, %rs2, 0;
-; SM70-NEXT:    selp.b16 %rs12, %rs2, %rs9, %p4;
-; SM70-NEXT:    cvt.u32.u16 %r7, %rs6;
+; SM70-NEXT:    setp.gt.f32 %p2, %r6, %r4;
+; SM70-NEXT:    selp.b16 %rs8, %rs4, %rs2, %p2;
+; SM70-NEXT:    setp.nan.f32 %p3, %r6, %r4;
+; SM70-NEXT:    selp.b16 %rs9, 0x7FC0, %rs8, %p3;
+; SM70-NEXT:    cvt.u32.u16 %r7, %rs9;
 ; SM70-NEXT:    shl.b32 %r8, %r7, 16;
-; SM70-NEXT:    setp.eq.f32 %p5, %r8, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs13, %rs12, %rs6, %p5;
+; SM70-NEXT:    setp.eq.f32 %p4, %r8, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs10, %rs7, %rs9, %p4;
+; SM70-NEXT:    setp.eq.s16 %p5, %rs3, 0;
+; SM70-NEXT:    selp.b16 %rs11, %rs3, %rs1, %p5;
 ; SM70-NEXT:    cvt.u32.u16 %r9, %rs1;
 ; SM70-NEXT:    shl.b32 %r10, %r9, 16;
 ; SM70-NEXT:    cvt.u32.u16 %r11, %rs3;
 ; SM70-NEXT:    shl.b32 %r12, %r11, 16;
 ; SM70-NEXT:    setp.gt.f32 %p6, %r12, %r10;
-; SM70-NEXT:    selp.b16 %rs14, %rs3, %rs1, %p6;
+; SM70-NEXT:    selp.b16 %rs12, %rs3, %rs1, %p6;
 ; SM70-NEXT:    setp.nan.f32 %p7, %r12, %r10;
-; SM70-NEXT:    selp.b16 %rs15, 0x7FC0, %rs14, %p7;
-; SM70-NEXT:    setp.eq.s16 %p8, %rs3, 0;
-; SM70-NEXT:    selp.b16 %rs16, %rs3, %rs15, %p8;
-; SM70-NEXT:    setp.eq.s16 %p9, %rs1, 0;
-; SM70-NEXT:    selp.b16 %rs17, %rs1, %rs16, %p9;
-; SM70-NEXT:    cvt.u32.u16 %r13, %rs15;
+; SM70-NEXT:    selp.b16 %rs13, 0x7FC0, %rs12, %p7;
+; SM70-NEXT:    cvt.u32.u16 %r13, %rs13;
 ; SM70-NEXT:    shl.b32 %r14, %r13, 16;
-; SM70-NEXT:    setp.eq.f32 %p10, %r14, 0f00000000;
-; SM70-NEXT:    selp.b16 %rs18, %rs17, %rs15, %p10;
-; SM70-NEXT:    mov.b32 %r15, {%rs18, %rs13};
+; SM70-NEXT:    setp.eq.f32 %p8, %r14, 0f00000000;
+; SM70-NEXT:    selp.b16 %rs14, %rs11, %rs13, %p8;
+; SM70-NEXT:    mov.b32 %r15, {%rs14, %rs10};
 ; SM70-NEXT:    st.param.b32 [func_retval0], %r15;
 ; SM70-NEXT:    ret;
 ;
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index 441fdec7ce5c0..d84d725bf72cf 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -612,27 +612,25 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) {
 define half @minimum_half(half %a, half %b) {
 ; CHECK-NOF16-LABEL: minimum_half(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
+; CHECK-NOF16-NEXT:    setp.ne.s16 %p1, %rs1, 0;
 ; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
+; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT:    setp.ne.s16 %p3, %rs1, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT:    setp.ne.s16 %p4, %rs2, 0;
-; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
-; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-NOF16-NEXT:    setp.lt.f32 %p2, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs4, %rs1, %rs2, %p2;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p3, %r2, %r1;
+; CHECK-NOF16-NEXT:    selp.b16 %rs5, 0x7E00, %rs4, %p3;
+; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs5;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs3, %rs5, %p4;
+; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_half(
@@ -648,27 +646,25 @@ define half @minimum_half(half %a, half %b) {
 ;
 ; CHECK-SM80-NOF16-LABEL: minimum_half(
 ; CHECK-SM80-NOF16:       {
-; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<6>;
-; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<8>;
+; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<5>;
+; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<7>;
 ; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
 ; CHECK-SM80-NOF16-EMPTY:
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
+; CHECK-SM80-NOF16-NEXT:    setp.ne.s16 %p1, %rs1, 0;
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r1, %rs2;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r2, %rs1;
-; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p1, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
-; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.ne.s16 %p3, %rs1, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.ne.s16 %p4, %rs2, 0;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
-; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
-; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
-; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
+; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p2, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, %rs1, %rs2, %p2;
+; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p3, %r2, %r1;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, 0x7E00, %rs4, %p3;
+; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs5;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p4, %r3, 0f00000000;
+; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs3, %rs5, %p4;
+; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs6;
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call half @llvm.minimum.f16(half %a, half %b)
   ret half %x
@@ -677,22 +673,20 @@ define half @minimum_half(half %a, half %b) {
 define float @minimum_float(float %a, float %b) {
 ; CHECK-NOF16-LABEL: minimum_float(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<7>;
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_param_0];
+; CHECK-NOF16-NEXT:    setp.ne.s32 %p1, %r1, 0;
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_param_1];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
-; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.ne.s32 %p2, %r1, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.ne.s32 %p3, %r2, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r4, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
-; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NOF16-NEXT:    selp.f32 %r3, %r1, %r2, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r1, %r2;
+; CHECK-NOF16-NEXT:    min.f32 %r4, %r1, %r2;
+; CHECK-NOF16-NEXT:    selp.f32 %r5, 0f7FC00000, %r4, %p2;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r5, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r6, %r3, %r5, %p3;
+; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r6;
 ; CHECK-NOF16-NEXT:    ret;
 ;
 ; CHECK-F16-LABEL: minimum_float(
@@ -728,13 +722,13 @@ define float @minimum_imm1(float %a) {
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm1_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.ne.s32 %p2, %r1, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
+; CHECK-NOF16-NEXT:    setp.ne.s32 %p1, %r1, 0;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r1, %r1;
+; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p2;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r4, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r5, %r2, %r4, %p3;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -769,13 +763,13 @@ define float @minimum_imm2(float %a) {
 ; CHECK-NOF16-EMPTY:
 ; CHECK-NOF16-NEXT:  // %bb.0:
 ; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_imm2_param_0];
-; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
-; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.ne.s32 %p2, %r1, 0;
-; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
-; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
+; CHECK-NOF16-NEXT:    setp.ne.s32 %p1, %r1, 0;
+; CHECK-NOF16-NEXT:    selp.f32 %r2, %r1, 0f00000000, %p1;
+; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r1, %r1;
+; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p2;
+; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r4, 0f00000000;
+; CHECK-NOF16-NEXT:    selp.f32 %r5, %r2, %r4, %p3;
 ; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NOF16-NEXT:    ret;
 ;
@@ -805,22 +799,20 @@ define float @minimum_imm2(float %a) {
 define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-LABEL: minimum_float_ftz(
 ; CHECK-NOF16:       {
-; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
-; CHECK-NOF16-NEXT:    .reg .b32 %r<8>;
+; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
+; CHECK-NOF16-NEXT:    .reg .b32 %r<7>;
 ; CHECK-NOF...
[truncated]

@arsenm arsenm added the floating-point Floating-point math label Jun 4, 2025
@nikic nikic merged commit 54da543 into llvm:main Jun 4, 2025
19 checks passed
@nikic nikic deleted the maximum-avoid-one-cmp branch June 4, 2025 08:41
@d0k
Copy link
Member

d0k commented Jun 4, 2025

We already know at this point that both values must be +/-0.0

Isn't it just one of them that must be +/-0.0? We only know that the other value is smaller (max)/bigger (min) than 0.

only check the sign of one of the values

If we were actually checking the sign this would work, but we compare equality with integer 0.

nikic added a commit that referenced this pull request Jun 4, 2025
…142732)"

This reverts commit 54da543.

I made a logic error here with the assumption that both values
are known to be +/-0.0.
@nikic
Copy link
Contributor Author

nikic commented Jun 4, 2025

We already know at this point that both values must be +/-0.0

Isn't it just one of them that must be +/-0.0? We only know that the other value is smaller (max)/bigger (min) than 0.

only check the sign of one of the values

If we were actually checking the sign this would work, but we compare equality with integer 0.

You are right. I've reverted this for now.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants