[X86][GlobalISel] - Legalize And Select of G_FPTOSI/G_SITOFP in X87 mode #137377

pawan-nirpal-031 · 2025-04-25T18:03:23Z

Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.

llvmbot · 2025-04-25T18:03:54Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-x86

Author: Pawan Nirpal (pawan-nirpal-031)

Changes

Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.

Patch is 56.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137377.diff

11 Files Affected:

(modified) llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp (+12-1)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+89-3)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+6)
(modified) llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp (+12)
(modified) llvm/lib/Target/X86/X86InstrFragments.td (+22-6)
(added) llvm/lib/Target/X86/X86InstrGISel.td (+31)
(modified) llvm/lib/Target/X86/X86InstrInfo.td (+5)
(added) llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir (+136)
(added) llvm/test/CodeGen/X86/GlobalISel/isel-sint-to-fp64-x86.mir (+152)
(modified) llvm/test/CodeGen/X86/isel-fp-to-sint-x87.ll (+222-108)
(modified) llvm/test/CodeGen/X86/isel-sint-to-fp-x87.ll (+179-74)

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index ea08f71b8af4a..f409ec0c921fd 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -77,6 +77,8 @@ class X86InstructionSelector : public InstructionSelector {
   unsigned getPtrLoadStoreOp(const LLT &Ty, const RegisterBank &RB,
                              unsigned Opc) const;
 
+  bool checkMemoryOpSize(const MachineInstr &MI, unsigned NumBytes) const;
+
   bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
                          MachineFunction &MF) const;
   bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -355,6 +357,15 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
   return true;
 }
 
+bool X86InstructionSelector::checkMemoryOpSize(const MachineInstr &MI,
+                                               unsigned NumBytes) const {
+  if (!MI.mayLoadOrStore())
+    return false;
+  assert(MI.hasOneMemOperand() &&
+         "Expected load/store to have only one mem op!");
+  return (*MI.memoperands_begin())->getSize() == NumBytes;
+}
+
 bool X86InstructionSelector::select(MachineInstr &I) {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -364,7 +375,7 @@ bool X86InstructionSelector::select(MachineInstr &I) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   unsigned Opcode = I.getOpcode();
-  if (!isPreISelGenericOpcode(Opcode)) {
+  if (!isPreISelGenericOpcode(Opcode) && !I.isPreISelOpcode()) {
     // Certain non-generic instructions also need some special handling.
 
     if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..84e95cc40b9e8 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -498,7 +499,16 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                 (typePairInSet(0, 1, {{s64, s32}})(Query) ||
                  (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
       })
-      .clampScalar(1, s32, sMaxScalar)
+      .customIf([=](const LegalityQuery &Query) -> bool {
+        if (!UseX87)
+          return false;
+        if ((typeIs(0, s32)(Query) && HasSSE1) ||
+            (typeIs(0, s64)(Query) && HasSSE2))
+          return false;
+        return typeInSet(0, {s32, s64, s80})(Query) &&
+               typeInSet(1, {s16, s32, s64})(Query);
+      })
+      .clampScalar(1, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
       .widenScalarToNextPow2(1)
       .clampScalar(0, s32, HasSSE2 ? s64 : s32)
       .widenScalarToNextPow2(0);
@@ -512,9 +522,18 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                 (typePairInSet(0, 1, {{s32, s64}})(Query) ||
                  (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
       })
-      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+      .customIf([=](const LegalityQuery &Query) -> bool {
+        if (!UseX87)
+          return false;
+        if ((typeIs(1, s32)(Query) && HasSSE1) ||
+            (typeIs(1, s64)(Query) && HasSSE2))
+          return false;
+        return typeInSet(0, {s16, s32, s64})(Query) &&
+               typeInSet(1, {s32, s64, s80})(Query);
+      })
+      .clampScalar(0, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
       .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, sMaxScalar)
+      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
       .widenScalarToNextPow2(1);
 
   // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
@@ -671,10 +690,77 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_SITOFP:
+    return legalizeSITOFP(MI, MRI, Helper);
+  case TargetOpcode::G_FPTOSI:
+    return legalizeFPTOSI(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
 
+bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = *MI.getMF();
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
+          SrcTy.getSizeInBits() == 64) &&
+         "Unexpected source type for SITOFP in X87 mode.");
+
+  const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
+  int MemSize = SrcTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
+
+  // Store the integer value on the FPU stack.
+  MIRBuilder.buildStore(Src, SlotPointer, *StoreMMO);
+
+  MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOLoad, MemSize, Align(MemSize));
+  MIRBuilder.buildInstr(X86::G_FILD)
+      .addDef(Dst)
+      .addUse(SlotPointer.getReg(0))
+      .addMemOperand(LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineFunction &MF = *MI.getMF();
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  unsigned MemSize = DstTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
+
+  MIRBuilder.buildInstr(X86::G_FIST)
+      .addUse(Src)
+      .addUse(SlotPointer.getReg(0))
+      .addMemOperand(StoreMMO);
+
+  MIRBuilder.buildLoad(Dst, SlotPointer, PtrInfo, Align(MemSize));
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
                                            MachineRegisterInfo &MRI,
                                            LegalizerHelper &Helper) const {
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..1ba82674ed4c6 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,12 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeSITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
+
+  bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 0baca81494694..74fe5fcaab6cc 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -111,6 +111,7 @@ bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_FCMP:
+  case X86::G_FIST:
   case TargetOpcode::G_LROUND:
   case TargetOpcode::G_LLROUND:
   case TargetOpcode::G_INTRINSIC_TRUNC:
@@ -129,6 +130,7 @@ bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP:
+  case X86::G_FILD:
     return true;
   default:
     break;
@@ -296,6 +298,16 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // VECRReg)
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
     break;
+  case X86::G_FIST:
+  case X86::G_FILD: {
+    auto &Op0 = MI.getOperand(0);
+    auto &Op1 = MI.getOperand(1);
+    const LLT Ty0 = MRI.getType(Op0.getReg());
+    const LLT Ty1 = MRI.getType(Op1.getReg());
+    OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ true);
+    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ false);
+    break;
+  }
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_UITOFP:
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f9d70d1bb5d85..00af5b1f50733 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -841,13 +841,21 @@ def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
 
 def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
+}
+
 def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 4); }];
+}
+
 def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 8); }];
+}
 
 def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
                         (X86fist node:$val, node:$ptr), [{
@@ -862,15 +870,23 @@ def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
 def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
+}
+
 def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 4); }];
+}
+
 def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 8); }];
+}
 
 //===----------------------------------------------------------------------===//
 // FPStack pattern fragments
diff --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td
new file mode 100644
index 0000000000000..7f5d92f43b0bb
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrGISel.td
@@ -0,0 +1,31 @@
+//===- X86InstrGISel.td - X86 GISel target specific opcodes -*- tablegen -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// X86 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+class X86GenericInstruction : GenericInstruction { let Namespace = "X86"; }
+
+def G_FILD : X86GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins ptype1:$src);
+  let hasSideEffects = false;
+  let mayLoad = true;
+}
+def G_FIST : X86GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins type0:$src1, ptype1:$src2);
+  let hasSideEffects = false;
+  let mayStore = true;
+}
+
+def : GINodeEquiv<G_FILD, X86fild>;
+def : GINodeEquiv<G_FIST, X86fp_to_mem>;
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index e75d6743f9273..7f6c5614847e3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -37,6 +37,11 @@ include "X86InstrFormats.td"
 //
 include "X86InstrUtils.td"
 
+//===----------------------------------------------------------------------===//
+// Global ISel
+//
+include "X86InstrGISel.td"
+
 //===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir b/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir
new file mode 100644
index 0000000000000..3e1af9d836aec
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# NOTE: This MIR test is required because the support for 64 bit memory ops is missing in X86 mode, Due to distinction between float/int types, support is expected in near future and there is this RFC in place https://discourse.llvm.org/t/rfc-globalisel-adding-fp-type-information-to-llt/83349. Once this support is introduced this test must be dropped and integrated into the LLVM IR tests.
+# RUN: llc -O2 -mtriple=i686-linux-gnu -mattr=+x87,-sse,-sse2 -run-pass=regbankselect,instruction-select -disable-gisel-legality-check -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes GISEL-X86
+
+---
+name:            test_double_to_int16
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int16
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: FP64_TO_INT16_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s16) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0)
+    ; GISEL-X86-NEXT: $ax = COPY [[MOV16rm]]
+    ; GISEL-X86-NEXT: RET 0, implicit $ax
+    %0:_(s64) = IMPLICIT_DEF
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %3(p0) :: (store (s16) into %stack.0)
+    %2:_(s16) = G_LOAD %3(p0) :: (load (s16) from %stack.0)
+    $ax = COPY %2(s16)
+    RET 0, implicit $ax
+...
+---
+name:            test_double_to_int32
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int32
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: FP64_TO_INT32_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s32) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
+    ; GISEL-X86-NEXT: $eax = COPY [[MOV32rm]]
+    ; GISEL-X86-NEXT: RET 0, implicit $eax
+    %0:_(s64) = IMPLICIT_DEF
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %3(p0) :: (store (s32) into %stack.0)
+    %2:_(s32) = G_LOAD %3(p0) :: (load (s32) from %stack.0)
+    $eax = COPY %2(s32)
+    RET 0, implicit $eax
+...
+---
+name:            test_double_to_int64
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int64
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.0, 1, $noreg, 0, $noreg
+    ; GISEL-X86-NEXT: FP64_TO_INT64_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s64) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0, align 8)
+    ; GISEL-X86-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[LEA32r]], 1, $noreg, 4, $noreg :: (load (s32) from %stack.0 + 4, basealign 8)
+    ; GISEL-X86-NEXT: $eax = COPY [[MOV32rm]]
+    ; GISEL-X86-NEXT: $edx = COPY [[MOV32rm1]]
+    ; GISEL-X86-NEXT: RET 0, implicit $eax, implicit $edx
+    %1:_(p0) = G_FRAME_INDEX %fixed-stack.0
+    %10:_(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
+    %8:_(s32) = G_CONSTANT i32 4
+    %11:_(p0) = G_PTR_ADD %1, %8(s32)
+    %12:_(s32) = G_LOAD %11(p0) :: (invariant load (s32) from %fixed-stack.0 + 4, basealign 16)
+    %0:_(s64) = IMPLICIT_DEF
+    %5:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %5(p0) :: (store (s64) into %stack.0)
+    %6:_(s32) = G_LOAD %5(p0) :: (load (s32) from %stack.0, align 8)
+    %7:_(p0) = G_PTR_ADD %5, %8(s32)
+    %9:_(s32) = G_LOAD %7(p0) :: (load (s32) from %stack.0 + 4, basealign 8)
+    $eax = COPY %6(s32)
+    $edx = COPY %9(s32)
+    RET 0, implicit $eax, implicit $edx
+...
+---
+name:            test_double_to_int8
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int8
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GIS...
[truncated]

pawan-nirpal-031 · 2025-04-25T18:07:59Z

Thank you @e-kud for the #130445 which is used by this PR. :)

pawan-nirpal-031 · 2025-04-25T18:09:18Z

@e-kud @arsenm @RKSimon @JaydeepChauhan14 Can you please review.

arsenm · 2025-04-25T19:36:20Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

+  const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
+  int MemSize = SrcTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));


Can use LegalizerHelper::createStackTemporary?

arsenm · 2025-04-25T19:36:41Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

+  unsigned MemSize = DstTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));


arsenm · 2025-04-25T19:37:08Z

llvm/lib/Target/X86/X86InstrFragments.td

@@ -841,13 +841,21 @@ def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{

 def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];


You can do this in terms of LLT instead of the byte size, like the dag version

arsenm · 2025-04-25T19:37:40Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

+      .customIf([=](const LegalityQuery &Query) -> bool {
+        if (!UseX87)
+          return false;


Use the new format that pulls the predicate out of the legality query?

arsenm · 2025-04-25T19:38:10Z

llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp

+  if (!MI.mayLoadOrStore())
+    return false;
+  assert(MI.hasOneMemOperand() &&
+         "Expected load/store to have only one mem op!");


In the use context I would expect MI.mayLoadOrStore to be implied

RKSimon · 2025-05-13T12:04:48Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

-        return typeInSet(0, {s32, s64, s80})(Query) &&
-               typeInSet(1, {s16, s32, s64})(Query);
-      })
+      .legalFor(HasSSE1, {s32, s32})


Should this be: .legalFor(HasSSE1, {{s32, s32}}) for the dst/src pair?

arsenm · 2025-05-14T19:26:30Z

llvm/lib/Target/X86/X86InstrFragments.td

@@ -871,21 +871,21 @@ def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
                              (X86fp_to_mem node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
 }]> {
-  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, LLT::scalar(16)); }];


You shouldn't need a custom code predicate for this. There is already a MemoryVT and ScalarMemoryVT field on PatFrag

Hi @arsenm, I don't get the idea. Do you suggest to specify the type of $val and get rid of predicate for both SDAG and GISEL? I've tried to find examples of PatFrag using its memory fields but was unsuccessful.
Otherwise GlobalISel will always try to find its own version of predicate regardless of SDAG implementation.

No, not the type of val, but the memory type for X86_fp_to_mem. Many of the common custom predicates were moved to specific fields on PatFrag that TableGen recognizes, like the memory alignment to avoid custom predicate code. This is how extending and atomic loads are specified, but the mechanism is supposed to be general.

The example would be how atomic_load_32 and atomic_load_64 work, they each set MemoryVT and wrap another PatFrag for atomic_load, which in turn wraps a PatFrag around ld

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

llvm/lib/Target/X86/X86InstrGISel.td

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>

llvm/lib/Target/X86/X86InstrFragments.td

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

e-kud · 2025-05-26T15:10:12Z

llvm/lib/Target/X86/X86InstrFragments.td

+  let IsStore = true;
+  let MemoryVT = i32;
+}
+
 def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
                              (X86fp_to_mem node:$val, node:$ptr), [{
  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;


I expected that we don't need SDAG predicate as well, do we?

The memory type should replace this

I feel that should be done in a separate PR?

I'm ok with both options.

Thank you, I will address this in a separate PR.

e-kud

LGTM

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

pawan-nirpal-031 · 2025-05-30T14:03:39Z

Hi @arsenm @RKSimon Ping for review.

RKSimon

LGTM - cheers

llvm-ci · 2025-06-02T14:51:32Z

LLVM Buildbot has detected a new failure on builder mlir-nvidia-gcc7 running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/13625

Here is the relevant piece of the build log for the reference

Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5698fdfd3270 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5698fdfd3270 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

…llvm#137377) Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.

[X86][GlobalISel] - Legalize And Select of G_FPTOSI/G_SITOFP in X87 mode

1415d66

llvmbot added backend:X86 llvm:globalisel labels Apr 25, 2025

e-kud requested review from arsenm, RKSimon and e-kud April 25, 2025 18:12

arsenm reviewed Apr 25, 2025

View reviewed changes

addressing cmts

2dcb057

pawan-nirpal-031 requested a review from arsenm May 13, 2025 10:17

RKSimon reviewed May 13, 2025

View reviewed changes

pawan-nirpal-031 added 3 commits May 14, 2025 06:09

introduce predicated customForCartesianProduct

1ca8ff2

braces for legalfor

4d9944d

fix failing tests

c62687d

arsenm reviewed May 14, 2025

View reviewed changes

fix tests

10bc9bf

MalaySanghi reviewed May 22, 2025

View reviewed changes

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp Show resolved Hide resolved

arsenm reviewed May 23, 2025

View reviewed changes

llvm/lib/Target/X86/X86InstrGISel.td Outdated Show resolved Hide resolved

pawan-nirpal-031 and others added 2 commits May 23, 2025 17:41

Update llvm/lib/Target/X86/X86InstrGISel.td

1e910d3

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>

use MemoryVT instead of custom c++ predicate

c1bb7af

e-kud reviewed May 23, 2025

View reviewed changes

llvm/lib/Target/X86/X86InstrFragments.td Outdated Show resolved Hide resolved

oops

919c934

e-kud reviewed May 26, 2025

View reviewed changes

e-kud approved these changes May 28, 2025

View reviewed changes

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp Outdated Show resolved Hide resolved

remove redundant include

9821d76

Merge branch 'main' into gisel-sitofp-fptosi-x87-enablement

19b359e

RKSimon approved these changes May 31, 2025

View reviewed changes

e-kud merged commit a4b9e82 into llvm:main Jun 2, 2025
11 checks passed

pawan-nirpal-031 deleted the gisel-sitofp-fptosi-x87-enablement branch June 2, 2025 14:44

sallto pushed a commit to sallto/llvm-project that referenced this pull request Jun 3, 2025

[X86][GlobalISel] - Legalize And Select G_FPTOSI/G_SITOFP in X87 mode (…

dd831c8

…llvm#137377) Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.

[X86][GlobalISel] - Legalize And Select of G_FPTOSI/G_SITOFP in X87 mode #137377

[X86][GlobalISel] - Legalize And Select of G_FPTOSI/G_SITOFP in X87 mode #137377

Uh oh!

Conversation

pawan-nirpal-031 commented Apr 25, 2025

Uh oh!

llvmbot commented Apr 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

pawan-nirpal-031 commented Apr 25, 2025

Uh oh!

pawan-nirpal-031 commented Apr 25, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

e-kud left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

pawan-nirpal-031 commented May 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jun 2, 2025

Uh oh!

Uh oh!

llvmbot commented Apr 25, 2025 •

edited

Loading

pawan-nirpal-031 commented May 30, 2025 •

edited

Loading