Skip to content

[X86][GlobalISel] - Legalize And Select of G_FPTOSI/G_SITOFP in X87 mode #137377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 2, 2025

Conversation

pawan-nirpal-031
Copy link
Contributor

Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.

@llvmbot
Copy link
Member

llvmbot commented Apr 25, 2025

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-x86

Author: Pawan Nirpal (pawan-nirpal-031)

Changes

Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes in x87 mode.


Patch is 56.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137377.diff

11 Files Affected:

  • (modified) llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp (+12-1)
  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+89-3)
  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+6)
  • (modified) llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp (+12)
  • (modified) llvm/lib/Target/X86/X86InstrFragments.td (+22-6)
  • (added) llvm/lib/Target/X86/X86InstrGISel.td (+31)
  • (modified) llvm/lib/Target/X86/X86InstrInfo.td (+5)
  • (added) llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir (+136)
  • (added) llvm/test/CodeGen/X86/GlobalISel/isel-sint-to-fp64-x86.mir (+152)
  • (modified) llvm/test/CodeGen/X86/isel-fp-to-sint-x87.ll (+222-108)
  • (modified) llvm/test/CodeGen/X86/isel-sint-to-fp-x87.ll (+179-74)
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index ea08f71b8af4a..f409ec0c921fd 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -77,6 +77,8 @@ class X86InstructionSelector : public InstructionSelector {
   unsigned getPtrLoadStoreOp(const LLT &Ty, const RegisterBank &RB,
                              unsigned Opc) const;
 
+  bool checkMemoryOpSize(const MachineInstr &MI, unsigned NumBytes) const;
+
   bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
                          MachineFunction &MF) const;
   bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -355,6 +357,15 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
   return true;
 }
 
+bool X86InstructionSelector::checkMemoryOpSize(const MachineInstr &MI,
+                                               unsigned NumBytes) const {
+  if (!MI.mayLoadOrStore())
+    return false;
+  assert(MI.hasOneMemOperand() &&
+         "Expected load/store to have only one mem op!");
+  return (*MI.memoperands_begin())->getSize() == NumBytes;
+}
+
 bool X86InstructionSelector::select(MachineInstr &I) {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -364,7 +375,7 @@ bool X86InstructionSelector::select(MachineInstr &I) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   unsigned Opcode = I.getOpcode();
-  if (!isPreISelGenericOpcode(Opcode)) {
+  if (!isPreISelGenericOpcode(Opcode) && !I.isPreISelOpcode()) {
     // Certain non-generic instructions also need some special handling.
 
     if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..84e95cc40b9e8 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -498,7 +499,16 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                 (typePairInSet(0, 1, {{s64, s32}})(Query) ||
                  (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
       })
-      .clampScalar(1, s32, sMaxScalar)
+      .customIf([=](const LegalityQuery &Query) -> bool {
+        if (!UseX87)
+          return false;
+        if ((typeIs(0, s32)(Query) && HasSSE1) ||
+            (typeIs(0, s64)(Query) && HasSSE2))
+          return false;
+        return typeInSet(0, {s32, s64, s80})(Query) &&
+               typeInSet(1, {s16, s32, s64})(Query);
+      })
+      .clampScalar(1, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
       .widenScalarToNextPow2(1)
       .clampScalar(0, s32, HasSSE2 ? s64 : s32)
       .widenScalarToNextPow2(0);
@@ -512,9 +522,18 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                 (typePairInSet(0, 1, {{s32, s64}})(Query) ||
                  (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
       })
-      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+      .customIf([=](const LegalityQuery &Query) -> bool {
+        if (!UseX87)
+          return false;
+        if ((typeIs(1, s32)(Query) && HasSSE1) ||
+            (typeIs(1, s64)(Query) && HasSSE2))
+          return false;
+        return typeInSet(0, {s16, s32, s64})(Query) &&
+               typeInSet(1, {s32, s64, s80})(Query);
+      })
+      .clampScalar(0, (UseX87 && !HasSSE1) ? s16 : s32, sMaxScalar)
       .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, sMaxScalar)
+      .clampScalar(1, s32, HasSSE2 ? s64 : s32)
       .widenScalarToNextPow2(1);
 
   // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
@@ -671,10 +690,77 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_SITOFP:
+    return legalizeSITOFP(MI, MRI, Helper);
+  case TargetOpcode::G_FPTOSI:
+    return legalizeFPTOSI(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
 
+bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = *MI.getMF();
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
+          SrcTy.getSizeInBits() == 64) &&
+         "Unexpected source type for SITOFP in X87 mode.");
+
+  const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
+  int MemSize = SrcTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
+
+  // Store the integer value on the FPU stack.
+  MIRBuilder.buildStore(Src, SlotPointer, *StoreMMO);
+
+  MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOLoad, MemSize, Align(MemSize));
+  MIRBuilder.buildInstr(X86::G_FILD)
+      .addDef(Dst)
+      .addUse(SlotPointer.getReg(0))
+      .addMemOperand(LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      LegalizerHelper &Helper) const {
+  MachineFunction &MF = *MI.getMF();
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
+  auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
+
+  unsigned MemSize = DstTy.getSizeInBytes();
+  int StackSlot =
+      MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
+
+  auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
+
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
+  MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+      PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
+
+  MIRBuilder.buildInstr(X86::G_FIST)
+      .addUse(Src)
+      .addUse(SlotPointer.getReg(0))
+      .addMemOperand(StoreMMO);
+
+  MIRBuilder.buildLoad(Dst, SlotPointer, PtrInfo, Align(MemSize));
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
                                            MachineRegisterInfo &MRI,
                                            LegalizerHelper &Helper) const {
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..1ba82674ed4c6 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,12 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeSITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
+
+  bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index 0baca81494694..74fe5fcaab6cc 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -111,6 +111,7 @@ bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_FCMP:
+  case X86::G_FIST:
   case TargetOpcode::G_LROUND:
   case TargetOpcode::G_LLROUND:
   case TargetOpcode::G_INTRINSIC_TRUNC:
@@ -129,6 +130,7 @@ bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP:
+  case X86::G_FILD:
     return true;
   default:
     break;
@@ -296,6 +298,16 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // VECRReg)
     getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
     break;
+  case X86::G_FIST:
+  case X86::G_FILD: {
+    auto &Op0 = MI.getOperand(0);
+    auto &Op1 = MI.getOperand(1);
+    const LLT Ty0 = MRI.getType(Op0.getReg());
+    const LLT Ty1 = MRI.getType(Op1.getReg());
+    OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ true);
+    OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ false);
+    break;
+  }
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_UITOFP:
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f9d70d1bb5d85..00af5b1f50733 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -841,13 +841,21 @@ def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
 
 def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
+}
+
 def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 4); }];
+}
+
 def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 8); }];
+}
 
 def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
                         (X86fist node:$val, node:$ptr), [{
@@ -862,15 +870,23 @@ def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
 def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
+}
+
 def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 4); }];
+}
+
 def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
                               (X86fp_to_mem node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
+}]> {
+  let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 8); }];
+}
 
 //===----------------------------------------------------------------------===//
 // FPStack pattern fragments
diff --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td
new file mode 100644
index 0000000000000..7f5d92f43b0bb
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrGISel.td
@@ -0,0 +1,31 @@
+//===- X86InstrGISel.td - X86 GISel target specific opcodes -*- tablegen -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// X86 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+class X86GenericInstruction : GenericInstruction { let Namespace = "X86"; }
+
+def G_FILD : X86GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins ptype1:$src);
+  let hasSideEffects = false;
+  let mayLoad = true;
+}
+def G_FIST : X86GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins type0:$src1, ptype1:$src2);
+  let hasSideEffects = false;
+  let mayStore = true;
+}
+
+def : GINodeEquiv<G_FILD, X86fild>;
+def : GINodeEquiv<G_FIST, X86fp_to_mem>;
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index e75d6743f9273..7f6c5614847e3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -37,6 +37,11 @@ include "X86InstrFormats.td"
 //
 include "X86InstrUtils.td"
 
+//===----------------------------------------------------------------------===//
+// Global ISel
+//
+include "X86InstrGISel.td"
+
 //===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir b/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir
new file mode 100644
index 0000000000000..3e1af9d836aec
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/isel-fp64-to-sint-x86.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# NOTE: This MIR test is required because the support for 64 bit memory ops is missing in X86 mode, Due to distinction between float/int types, support is expected in near future and there is this RFC in place https://discourse.llvm.org/t/rfc-globalisel-adding-fp-type-information-to-llt/83349. Once this support is introduced this test must be dropped and integrated into the LLVM IR tests.
+# RUN: llc -O2 -mtriple=i686-linux-gnu -mattr=+x87,-sse,-sse2 -run-pass=regbankselect,instruction-select -disable-gisel-legality-check -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes GISEL-X86
+
+---
+name:            test_double_to_int16
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int16
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: FP64_TO_INT16_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s16) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0)
+    ; GISEL-X86-NEXT: $ax = COPY [[MOV16rm]]
+    ; GISEL-X86-NEXT: RET 0, implicit $ax
+    %0:_(s64) = IMPLICIT_DEF
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %3(p0) :: (store (s16) into %stack.0)
+    %2:_(s16) = G_LOAD %3(p0) :: (load (s16) from %stack.0)
+    $ax = COPY %2(s16)
+    RET 0, implicit $ax
+...
+---
+name:            test_double_to_int32
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int32
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: FP64_TO_INT32_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s32) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
+    ; GISEL-X86-NEXT: $eax = COPY [[MOV32rm]]
+    ; GISEL-X86-NEXT: RET 0, implicit $eax
+    %0:_(s64) = IMPLICIT_DEF
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %3(p0) :: (store (s32) into %stack.0)
+    %2:_(s32) = G_LOAD %3(p0) :: (load (s32) from %stack.0)
+    $eax = COPY %2(s32)
+    RET 0, implicit $eax
+...
+---
+name:            test_double_to_int64
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int64
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GISEL-X86-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.0, 1, $noreg, 0, $noreg
+    ; GISEL-X86-NEXT: FP64_TO_INT64_IN_MEM %stack.0, 1, $noreg, 0, $noreg, [[DEF]], implicit-def dead $eflags :: (store (s64) into %stack.0)
+    ; GISEL-X86-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0, align 8)
+    ; GISEL-X86-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[LEA32r]], 1, $noreg, 4, $noreg :: (load (s32) from %stack.0 + 4, basealign 8)
+    ; GISEL-X86-NEXT: $eax = COPY [[MOV32rm]]
+    ; GISEL-X86-NEXT: $edx = COPY [[MOV32rm1]]
+    ; GISEL-X86-NEXT: RET 0, implicit $eax, implicit $edx
+    %1:_(p0) = G_FRAME_INDEX %fixed-stack.0
+    %10:_(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
+    %8:_(s32) = G_CONSTANT i32 4
+    %11:_(p0) = G_PTR_ADD %1, %8(s32)
+    %12:_(s32) = G_LOAD %11(p0) :: (invariant load (s32) from %fixed-stack.0 + 4, basealign 16)
+    %0:_(s64) = IMPLICIT_DEF
+    %5:_(p0) = G_FRAME_INDEX %stack.0
+    G_FIST %0(s64), %5(p0) :: (store (s64) into %stack.0)
+    %6:_(s32) = G_LOAD %5(p0) :: (load (s32) from %stack.0, align 8)
+    %7:_(p0) = G_PTR_ADD %5, %8(s32)
+    %9:_(s32) = G_LOAD %7(p0) :: (load (s32) from %stack.0 + 4, basealign 8)
+    $eax = COPY %6(s32)
+    $edx = COPY %9(s32)
+    RET 0, implicit $eax, implicit $edx
+...
+---
+name:            test_double_to_int8
+alignment:       16
+exposesReturnsTwice: false
+legalized:       true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body:             |
+  bb.1.entry:
+    ; GISEL-X86-LABEL: name: test_double_to_int8
+    ; GISEL-X86: [[DEF:%[0-9]+]]:rfp64 = IMPLICIT_DEF
+    ; GIS...
[truncated]

@pawan-nirpal-031
Copy link
Contributor Author

Thank you @e-kud for the #130445 which is used by this PR. :)

@pawan-nirpal-031
Copy link
Contributor Author

@e-kud @arsenm @RKSimon @JaydeepChauhan14 Can you please review.

@e-kud e-kud requested review from arsenm, RKSimon and e-kud April 25, 2025 18:12
Comment on lines 712 to 720
const LLT p0 = LLT::pointer(0, MF.getTarget().getPointerSizeInBits(0));
int MemSize = SrcTy.getSizeInBytes();
int StackSlot =
MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);

auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can use LegalizerHelper::createStackTemporary?

Comment on lines 744 to 752
unsigned MemSize = DstTy.getSizeInBytes();
int StackSlot =
MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);

auto SlotPointer = MIRBuilder.buildFrameIndex(p0, StackSlot);

MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, StackSlot);
MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, MemSize, Align(MemSize));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

@@ -841,13 +841,21 @@ def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{

def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
}]> {
let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do this in terms of LLT instead of the byte size, like the dag version

Comment on lines 525 to 527
.customIf([=](const LegalityQuery &Query) -> bool {
if (!UseX87)
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the new format that pulls the predicate out of the legality query?

Comment on lines 362 to 365
if (!MI.mayLoadOrStore())
return false;
assert(MI.hasOneMemOperand() &&
"Expected load/store to have only one mem op!");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the use context I would expect MI.mayLoadOrStore to be implied

@pawan-nirpal-031 pawan-nirpal-031 requested a review from arsenm May 13, 2025 10:17
return typeInSet(0, {s32, s64, s80})(Query) &&
typeInSet(1, {s16, s32, s64})(Query);
})
.legalFor(HasSSE1, {s32, s32})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be: .legalFor(HasSSE1, {{s32, s32}}) for the dst/src pair?

@@ -871,21 +871,21 @@ def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
(X86fp_to_mem node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
}]> {
let GISelPredicateCode = [{ return checkMemoryOpSize(MI, 2); }];
let GISelPredicateCode = [{ return checkMemoryOpSize(MI, LLT::scalar(16)); }];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't need a custom code predicate for this. There is already a MemoryVT and ScalarMemoryVT field on PatFrag

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bump

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @arsenm, I don't get the idea. Do you suggest to specify the type of $val and get rid of predicate for both SDAG and GISEL? I've tried to find examples of PatFrag using its memory fields but was unsuccessful.
Otherwise GlobalISel will always try to find its own version of predicate regardless of SDAG implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, not the type of val, but the memory type for X86_fp_to_mem. Many of the common custom predicates were moved to specific fields on PatFrag that TableGen recognizes, like the memory alignment to avoid custom predicate code. This is how extending and atomic loads are specified, but the mechanism is supposed to be general.

The example would be how atomic_load_32 and atomic_load_64 work, they each set MemoryVT and wrap another PatFrag for atomic_load, which in turn wraps a PatFrag around ld

let IsStore = true;
let MemoryVT = i32;
}

def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
(X86fp_to_mem node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expected that we don't need SDAG predicate as well, do we?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The memory type should replace this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that should be done in a separate PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm ok with both options.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I will address this in a separate PR.

Copy link
Contributor

@e-kud e-kud left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@pawan-nirpal-031
Copy link
Contributor Author

pawan-nirpal-031 commented May 30, 2025

Hi @arsenm @RKSimon Ping for review.

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@e-kud e-kud merged commit a4b9e82 into llvm:main Jun 2, 2025
11 checks passed
@pawan-nirpal-031 pawan-nirpal-031 deleted the gisel-sitofp-fptosi-x87-enablement branch June 2, 2025 14:44
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 2, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia-gcc7 running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/13625

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5698fdfd3270 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5698fdfd3270 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

sallto pushed a commit to sallto/llvm-project that referenced this pull request Jun 3, 2025
…llvm#137377)

Support legalization and selection of G_FPTOSI/G_SITOFP generic opcodes
in x87 mode.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

7 participants