[NVPTX] Fix lowering of i1 SETCC #115035

AlexMaclean · 2024-11-05T17:50:41Z

Add DAG legalization support for expanding i1 SETCC nodes using appropriate logical operations to simulate integer comparisons. Use these expansions to handle i1 SETCC in NVPTX.

fixes #58428 and #57405

llvmbot · 2024-11-05T17:51:16Z

@llvm/pr-subscribers-llvm-selectiondag

Author: Alex MacLean (AlexMaclean)

Changes

fixes #58428

Full diff: https://github.com/llvm/llvm-project/pull/115035.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+43)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (+2)
(added) llvm/test/CodeGen/NVPTX/i1-icmp.ll (+193)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42232bd195a651..a599328a0e5be2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18723,6 +18723,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
     SDValue Op0 = N->getOperand(0);
     SDValue Op1 = N->getOperand(1);
 
+    if (!TLI.isOperationLegal(ISD::SETCC, Op0.getValueType()))
+      return SDValue();
+
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d3bf0ecfe2cc92..a74e9dcccdd770 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -667,6 +667,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
     setTruncStoreAction(VT, MVT::i1, Expand);
   }
+  setOperationAction(ISD::SETCC, MVT::i1, Custom);
 
   // expand extload of vector of integers.
   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
@@ -2666,6 +2667,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   }
 }
 
+// Lowers SETCC nodes that aren't directly supported by our arch.
+SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue L = Op->getOperand(0);
+  SDValue R = Op->getOperand(1);
+
+  if (L.getValueType() != MVT::i1)
+    return SDValue();
+
+  SDLoc DL(Op);
+  SDValue Ret;
+  switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
+  default:
+    llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETEQ: // X == Y  -> ~(X^Y)
+    Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
+    break;
+  case ISD::SETNE: // X != Y   -->  (X^Y)
+    Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
+    break;
+  case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+  case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+  case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+  case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+  case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  }
+
+  return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
+}
+
 /// If the types match, convert the generic copysign to the NVPTXISD version,
 /// otherwise bail ensuring that mismatched cases are properly expaned.
 SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2919,6 +2960,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG);
   case ISD::LOAD:
     return LowerLOAD(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index c8b589ae39413e..b1bb9090464ac4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -628,6 +628,8 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
new file mode 100644
index 00000000000000..db9ae6541b87ae
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB0_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp eq i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ne(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ne(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<5>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ne_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ne_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    not.pred %p4, %p3;
+; CHECK-NEXT:    @%p4 bra $L__BB1_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB1_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ne i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sgt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sgt_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sgt_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    or.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB2_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB2_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sgt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_slt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_slt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_slt_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_slt_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    or.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB3_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB3_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp slt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sge(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sge(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sge_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sge_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    and.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB4_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB4_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sge i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sle(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sle(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sle_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sle_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    and.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB5_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB5_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sle i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+

llvmbot · 2024-11-05T17:51:16Z

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

fixes #58428

Full diff: https://github.com/llvm/llvm-project/pull/115035.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+43)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (+2)
(added) llvm/test/CodeGen/NVPTX/i1-icmp.ll (+193)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42232bd195a651..a599328a0e5be2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18723,6 +18723,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
     SDValue Op0 = N->getOperand(0);
     SDValue Op1 = N->getOperand(1);
 
+    if (!TLI.isOperationLegal(ISD::SETCC, Op0.getValueType()))
+      return SDValue();
+
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d3bf0ecfe2cc92..a74e9dcccdd770 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -667,6 +667,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
     setTruncStoreAction(VT, MVT::i1, Expand);
   }
+  setOperationAction(ISD::SETCC, MVT::i1, Custom);
 
   // expand extload of vector of integers.
   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
@@ -2666,6 +2667,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   }
 }
 
+// Lowers SETCC nodes that aren't directly supported by our arch.
+SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue L = Op->getOperand(0);
+  SDValue R = Op->getOperand(1);
+
+  if (L.getValueType() != MVT::i1)
+    return SDValue();
+
+  SDLoc DL(Op);
+  SDValue Ret;
+  switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
+  default:
+    llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETEQ: // X == Y  -> ~(X^Y)
+    Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
+    break;
+  case ISD::SETNE: // X != Y   -->  (X^Y)
+    Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
+    break;
+  case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+  case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+  case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+  case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+  case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  }
+
+  return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
+}
+
 /// If the types match, convert the generic copysign to the NVPTXISD version,
 /// otherwise bail ensuring that mismatched cases are properly expaned.
 SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2919,6 +2960,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG);
   case ISD::LOAD:
     return LowerLOAD(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index c8b589ae39413e..b1bb9090464ac4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -628,6 +628,8 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
new file mode 100644
index 00000000000000..db9ae6541b87ae
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB0_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp eq i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ne(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ne(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<5>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ne_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ne_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    not.pred %p4, %p3;
+; CHECK-NEXT:    @%p4 bra $L__BB1_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB1_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ne i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sgt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sgt_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sgt_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    or.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB2_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB2_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sgt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_slt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_slt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_slt_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_slt_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    or.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB3_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB3_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp slt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sge(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sge(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sge_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sge_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    and.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB4_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB4_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sge i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sle(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sle(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sle_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sle_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    and.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB5_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB5_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sle i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+

arsenm · 2024-11-05T17:55:52Z

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

@@ -2666,6 +2667,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
  }
 }

+// Lowers SETCC nodes that aren't directly supported by our arch.


Should use setCondCodeAction and not reinvent all of this

@arsenm Can you please explain a little further how I can use setCondCodeAction to avoid all of this?

It's an additional legality table because the single type based legality rules are insufficient for compares. It's the same as setting expand / custom / promote, except for the specific condition code and type

Hmm, this table seems to be used inconsistently, does the type represent the value which is produced by the SETCC node or the types of the values which are being compared?

https://github.com/llvm/llvm-project/blob/c1cec8c/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp#L8798
vs.
https://github.com/llvm/llvm-project/blob/c1cec8c/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp#L4994

Also what is the advantage of using this table over the current approach? What will I be able to avoid reinventing?

The types being compared.

You are reinventing condition code legalization here:

llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Line 11832 in c1cec8c

bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,

And generic code can no longer accurately query which conditions are legal, which is why you have the hack to check if SETCC is legal

Sorry, I'm still confused. I don't see anything like the boolean expansion I'm using for i1 in the LegalizeSetCCCondCode function.

And generic code can no longer accurately query which conditions are legal, which is why you have the hack to check if SETCC is legal

I think I'm going to need to add a check for legality to the dag combine no matter what, I suppose I can check this table instead.

Also, is existing code like this incorrect?

llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Line 4994 in c1cec8c

isCondCodeLegal(NewCC, VT.getSimpleVT())) &&

Okay, I've update the code to use the CondCode table. Not sure if this is what you had in mind though, I don't think it has made the implementation significantly simpler. I also did have to update a couple uses of the table which seemed incorrect.

@arsenm does this look better?

Artem-B · 2024-11-05T19:26:58Z

How much of a difference would it make on the SASS level if we were to just extend i1 arguments to i32 and then just run setp on them, instead of logical ops done directly on predicates?

E.g. setp.lt(i1 a, i1 b) -> setp.lt(i32 selp(a, 1, 0), i32 selp(b(1,0)))

AlexMaclean · 2024-11-05T22:40:20Z

How much of a difference would it make on the SASS level if we were to just extend i1 arguments to i32 and then just run setp on them, instead of logical ops done directly on predicates?

E.g. setp.lt(i1 a, i1 b) -> setp.lt(i32 selp(a, 1, 0), i32 selp(b(1,0)))

The sass will likely be much worse in most cases if we add the selp instructions in here.

Artem-B · 2024-11-05T22:56:42Z

You're right. At least, ptxas-11.x does not optimize those selects: https://godbolt.org/z/q99cvccfq

One observation is that under the hood, setp instruction translates into the full-blown comparison, combined with the predicate logical operation, and your patch folds both the setp.gt.s32 %p2, %r2, 1; and the predicate logical ops into a very nice SASS:

 ISETP.GT.AND P0, PT, R5, 0x1, PT 
 ISETP.GT.XOR P0, PT, R4, 0x1, P0 
 @!P0 IMAD.MOV.U32 R4, RZ, RZ, 0x1 
 @P0 IMAD.MOV.U32 R4, RZ, RZ, 0x7f

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

AlexMaclean · 2024-11-12T21:02:49Z

@arsenm ping for your thoughts on these discussions

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Ensure that all uses of CondCodeAction table are checking the compared types, not the produced type. This is a prerequisite to landing #115035

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/test/CodeGen/NVPTX/i1-icmp.ll

justinfargnoli · 2024-11-14T22:28:35Z

llvm/test/CodeGen/NVPTX/i1-icmp.ll

+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB0_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp eq i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}


Can we refactor this to:

define i1 @icmp_i1_eq(i1 %a, i1 %b) { %c = icmp eq i1 %a, %b ret i1 %c }

No, setcc used by a brcond is handled differently and that is part of the problem.

https://github.com/llvm/llvm-project/blob/6a58c531762a78a3336793891cedbb44b9ed9eb0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L18715

And why not

define i32 @f(i1 %0, i1 %1) { BB: %C = icmp ugt i1 %1, %0 br i1 %C, label %BB1, label %BB2 BB1: ret i32 1 BB2: ret i32 127 }

like in the reproducer?

justinfargnoli

Overall, LGTM :)

justinfargnoli · 2024-11-15T01:06:16Z

llvm/test/CodeGen/NVPTX/i1-icmp.ll

+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB0_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp eq i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}


And why not

define i32 @f(i1 %0, i1 %1) { BB: %C = icmp ugt i1 %1, %0 br i1 %C, label %BB1, label %BB2 BB1: ret i32 1 BB2: ret i32 127 }

like in the reproducer?

justinfargnoli · 2024-11-15T23:07:14Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+      default:
+        llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ: // X == Y  -->  ~(X ^ Y)
+        Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),


Does this need to be guarded by TLI.isLegal(ISD::XOR)?

I'd prefer not to. NVPTX is the only target using these expansions and we don't have any way to expand if not. I don't see what the point of adding them would be.

My question boils down to: will xor be legalized if it's generated for a target that doesn't support it? In other words, can Expand generate illegal code?

My assumption was no, but based on your answer, I'm gathering that Expand can generate illegal code and rely on the legalizer to legalize it. Is that correct?

I think the issue here is that there's more than one possible interpretation of "lower SetCC(i1) differently". NVPTX can do it via logical ops on predicates, other targets may not. On other targets it may be something else. Whoever would use "setCondCodeAction(...,Expand)" would have no idea what it would expand into. This should at least be documented. If/when we would encounter a target which would have a different way of expanding setcc, we would probably need a more nuanced hint to tell LLVM which flavor we actually want.

I think at the moment only Hexagon uses setCondCodeAction(...,i1, Expand). It would be good to check what effect this change has on them.

AlexMaclean · 2024-11-19T01:25:02Z

@arsenm @Artem-B when you have a minute could I please get another round of review on this change.

Artem-B · 2024-11-19T19:59:01Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

-      return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
-                          Equal ? ISD::SETEQ : ISD::SETNE);
+      const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
+      if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))


This could definitely use a comment. AFAICT what we're doing here is "combine into setcc(NE/EQ) only if we know they are legal, which leaves xor(i1) untouched and allows us to use eventually use logic ops on predicates" .

Do we actually need to add this special case here?
It appears that we do manage to lower setcc x, y, eq as xor.pred already : https://godbolt.org/z/cMx9d8P33

What happens if we remove the change here?

Presumably it will infinite loop the combiner without this

OK. That should also be mentioned in the comments.

Yep, we get stuck in an infinite loop: legalization converts the setcc into an xor and then dag combiner converts it back to a setcc over and over again. I've added a comment explaining.

It appears that we do manage to lower setcc x, y, eq as xor.pred already : https://godbolt.org/z/cMx9d8P33

Good catch, this is the result of some old table-gen patterns which handled eq and ne. I've removed these as the legalization approach handles these cases and is more powerful since it allows DAGCombiner to manipulate the logical ops created.

Artem-B · 2024-11-19T20:16:37Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+      default:
+        llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ: // X == Y  -->  ~(X ^ Y)
+        Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),


I think the issue here is that there's more than one possible interpretation of "lower SetCC(i1) differently". NVPTX can do it via logical ops on predicates, other targets may not. On other targets it may be something else. Whoever would use "setCondCodeAction(...,Expand)" would have no idea what it would expand into. This should at least be documented. If/when we would encounter a target which would have a different way of expanding setcc, we would probably need a more nuanced hint to tell LLVM which flavor we actually want.

I think at the moment only Hexagon uses setCondCodeAction(...,i1, Expand). It would be good to check what effect this change has on them.

arsenm · 2024-11-19T20:20:08Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

-      return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
-                          Equal ? ISD::SETEQ : ISD::SETNE);
+      const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
+      if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))


Presumably it will infinite loop the combiner without this

arsenm · 2024-11-19T20:21:15Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -11874,6 +11874,47 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
      return true;
    }

+    // Special case: expand i1 comparisons using logical operations.
+    if (OpVT == MVT::i1) {


Why does this need the type special case? LegalizeSetCCCondCode should do as asked regardless of the type

Why does this need the type special case?

The logical simplifications we're using to expand setcc for i1 will not be correct for larger types.

LegalizeSetCCCondCode should do as asked regardless of the type

In theory yes, but this isn't the case today, and creating a valid expansion for all types x cond-codes would be a time consuming way to add a lot of in practice dead code.

Do you have a more specific example for "not correct for larger types"? It's not an arbitrary wide typed value. The result conforms to getTargetBooleanContents, which in turn should respect all of the logical expansions

Most of these expansions take advantage of the unique nature of the i1 type. For example consider signed greater than, this will only be true for i1s if and only if the first operand is 0 and the second is -1, hence it can be expressed as ~X & Y. This sort of bit manipulation is of course totally invalid for any larger type.

To see that this transformation is okay for i1, and not for a larger type here is an alive2 where the transform has been expressed on llvm IR: https://alive2.llvm.org/ce/z/6jS_rA

Artem-B

LGTM with a nit.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

AlexMaclean requested review from jholewinski, Artem-B and justinfargnoli November 5, 2024 17:50

AlexMaclean self-assigned this Nov 5, 2024

llvmbot added backend:NVPTX llvm:SelectionDAG SelectionDAGISel as well labels Nov 5, 2024

arsenm reviewed Nov 5, 2024

View reviewed changes

AlexMaclean force-pushed the dev/amaclean/upstream-setcc-i1 branch from b167dbe to 9535ee8 Compare November 7, 2024 19:37

justinfargnoli reviewed Nov 7, 2024

View reviewed changes

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp Outdated Show resolved Hide resolved

Artem-B reviewed Nov 12, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Outdated Show resolved Hide resolved

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp Outdated Show resolved Hide resolved

AlexMaclean mentioned this pull request Nov 13, 2024

[SelectionDAG] Fixup type usage of CondCodeAction table #116082

Merged

AlexMaclean added a commit that referenced this pull request Nov 13, 2024

[SelectionDAG] Fixup type usage of CondCodeAction table (#116082)

7a8fe0f

Ensure that all uses of CondCodeAction table are checking the compared types, not the produced type. This is a prerequisite to landing #115035

AlexMaclean force-pushed the dev/amaclean/upstream-setcc-i1 branch from 9535ee8 to 593e953 Compare November 13, 2024 23:22

arsenm reviewed Nov 13, 2024

View reviewed changes

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp Outdated Show resolved Hide resolved

justinfargnoli reviewed Nov 14, 2024

View reviewed changes

justinfargnoli approved these changes Nov 15, 2024

View reviewed changes

justinfargnoli reviewed Nov 15, 2024

View reviewed changes

Artem-B reviewed Nov 19, 2024

View reviewed changes

arsenm reviewed Nov 19, 2024

View reviewed changes

AlexMaclean force-pushed the dev/amaclean/upstream-setcc-i1 branch from 5fda546 to 125a552 Compare November 19, 2024 23:29

Artem-B approved these changes Nov 19, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Outdated Show resolved Hide resolved

AlexMaclean linked an issue Nov 20, 2024 that may be closed by this pull request

[NVPTX] DAG->DAG Pattern Instruction Selection crashes on setcc #57405

Closed

arsenm approved these changes Dec 5, 2024

View reviewed changes

AlexMaclean added 5 commits December 5, 2024 18:46

[NVPTX] Fix lowering of i1 SETCC - address comments

beedad9

address comments

13a383f

address comments

83a7e42

address comments

980ea86

address comments -- clarify comment

c8da53d

AlexMaclean force-pushed the dev/amaclean/upstream-setcc-i1 branch from 125a552 to c8da53d Compare December 5, 2024 18:54

AlexMaclean merged commit 6018820 into llvm:main Dec 5, 2024
8 checks passed

[NVPTX] Fix lowering of i1 SETCC #115035

[NVPTX] Fix lowering of i1 SETCC #115035

Uh oh!

Conversation

AlexMaclean commented Nov 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Artem-B commented Nov 5, 2024

Uh oh!

AlexMaclean commented Nov 5, 2024

Uh oh!

Artem-B commented Nov 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

AlexMaclean commented Nov 12, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

justinfargnoli left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

AlexMaclean commented Nov 19, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

AlexMaclean commented Nov 5, 2024 •

edited

Loading

Artem-B commented Nov 5, 2024 •

edited

Loading