Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

Open
wants to merge 1 commit into
base: release/19.x
Choose a base branch
from

Conversation

llvmbot
Copy link
Collaborator

@llvmbot llvmbot commented Sep 18, 2024

Backport 13280d9

Requested by: @nikic

@llvmbot llvmbot added this to the LLVM 19.X Release milestone Sep 18, 2024
@llvmbot
Copy link
Collaborator Author

llvmbot commented Sep 18, 2024

@arsenm What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Collaborator Author

llvmbot commented Sep 18, 2024

@llvm/pr-subscribers-backend-loongarch

Author: None (llvmbot)

Changes

Backport 13280d9

Requested by: @nikic


Full diff: https://github.com/llvm/llvm-project/pull/109093.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2)
  • (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+126-72)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e01..267837add575dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering {
   bool isEligibleForTailCallOptimization(
       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
       const SmallVectorImpl<CCValAssign> &ArgLocs) const;
+
+  bool softPromoteHalfType() const override { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b43a06ce..03965ac81f3763 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
 define half @test_fadd_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fadd_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -32
-; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    move $a0, $a1
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fa1
-; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmov.s $fs1, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fs0
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_fadd_reg:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.d $sp, $sp, -32
 ; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fa1
-; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmov.s $fs1, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fs0
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    ret
@@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 ; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
 ; LA32-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT:    move $fp, $a1
-; LA32-NEXT:    move $s0, $a0
-; LA32-NEXT:    ld.hu $a0, $a0, 0
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    ld.hu $s0, $a0, 0
+; LA32-NEXT:    ld.hu $a0, $a1, 0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    ld.hu $a0, $fp, 0
+; LA32-NEXT:    move $a0, $s0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    st.h $a0, $s0, 0
+; LA32-NEXT:    st.h $a0, $fp, 0
 ; LA32-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
 ; LA32-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 ; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; LA64-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT:    move $fp, $a1
-; LA64-NEXT:    move $s0, $a0
-; LA64-NEXT:    ld.hu $a0, $a0, 0
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    ld.hu $s0, $a0, 0
+; LA64-NEXT:    ld.hu $a0, $a1, 0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    ld.hu $a0, $fp, 0
+; LA64-NEXT:    move $a0, $s0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    st.h $a0, $s0, 0
+; LA64-NEXT:    st.h $a0, $fp, 0
 ; LA64-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 define half @test_fmul_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fmul_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -32
-; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    move $a0, $a1
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fa1
-; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmov.s $fs1, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fs0
+; LA32-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmul.s $fa0, $fa0, $fs1
-; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_fmul_reg:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.d $sp, $sp, -32
 ; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fa1
-; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmov.s $fs1, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fs0
+; LA64-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmul.s $fa0, $fa0, $fs1
-; LA64-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    ret
@@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
 ; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
 ; LA32-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT:    move $fp, $a1
-; LA32-NEXT:    move $s0, $a0
-; LA32-NEXT:    ld.hu $a0, $a0, 0
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    ld.hu $s0, $a0, 0
+; LA32-NEXT:    ld.hu $a0, $a1, 0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    ld.hu $a0, $fp, 0
+; LA32-NEXT:    move $a0, $s0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    st.h $a0, $s0, 0
+; LA32-NEXT:    st.h $a0, $fp, 0
 ; LA32-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
 ; LA32-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
 ; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; LA64-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT:    move $fp, $a1
-; LA64-NEXT:    move $s0, $a0
-; LA64-NEXT:    ld.hu $a0, $a0, 0
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    ld.hu $s0, $a0, 0
+; LA64-NEXT:    ld.hu $a0, $a1, 0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    ld.hu $a0, $fp, 0
+; LA64-NEXT:    move $a0, $s0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    st.h $a0, $s0, 0
+; LA64-NEXT:    st.h $a0, $fp, 0
 ; LA64-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
   store half %r, ptr %p
   ret void
 }
+
+define half @freeze_half_undef() nounwind {
+; LA32-LABEL: freeze_half_undef:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    movgr2fr.w $fa0, $zero
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: freeze_half_undef:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.d $sp, $sp, -16
+; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    movgr2fr.w $fa0, $zero
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 16
+; LA64-NEXT:    ret
+  %y1 = freeze half undef
+  %t1 = fadd half %y1, %y1
+  ret half %t1
+}
+
+define half @freeze_half_poison(half %maybe.poison) nounwind {
+; LA32-LABEL: freeze_half_poison:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: freeze_half_poison:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.d $sp, $sp, -16
+; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 16
+; LA64-NEXT:    ret
+  %y1 = freeze half %maybe.poison
+  %t1 = fadd half %y1, %y1
+  ret half %t1
+}

@nikic
Copy link
Contributor

nikic commented Sep 18, 2024

Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs?

@yxd-ym
Copy link
Contributor

yxd-ym commented Sep 18, 2024

Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs?

if (!isTypeLegal(MVT::f16)) {
// Allow targets to control how we legalize half.
bool SoftPromoteHalfType = softPromoteHalfType();
bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType();
if (!UseFPRegsForHalfType) {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
} else {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
}
TransformToType[MVT::f16] = MVT::f32;
if (SoftPromoteHalfType) {
ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf);
} else {
ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
}

From the logic above, the answer seems to be yes. Because with this patch, the following functions' return values for loongarch are

  • softPromoteHalfType() => true
  • useFPRegsForHalfType() => false

// Return true if the half type should be promoted using soft promotion rules
// where each operation is promoted to f32 individually, then converted to
// fp16. The default behavior is to promote chains of operations, keeping
// intermediate results in f32 precision and range.
virtual bool softPromoteHalfType() const { return false; }
// Return true if, for soft-promoted half, the half type should be passed
// passed to and returned from functions as f32. The default behavior is to
// pass as i16. If soft-promoted half is not used, this function is ignored
// and values are always passed and returned as f32.
virtual bool useFPRegsForHalfType() const { return false; }

@tru
Copy link
Collaborator

tru commented Sep 24, 2024

Are there still questionmarks about this one @nikic or can it be approved and merged?

@nikic
Copy link
Contributor

nikic commented Sep 24, 2024

I'm concerned about changing the call ABI in a stable release, even if the ABI itself is not specified. It means that if you compile an object with LLVM 19.1.0, you may not be able to link it with an object compiled with LLVM 19.1.1.

It's also worth noting that the ABI used here may not be final either, #109368 proposed to change it again.

@heiher
Copy link
Member

heiher commented Sep 24, 2024

Currently, Rust's compiler-builtins has marked fp16 as available for loongarch64, but in fact, the functionality is broken. Even with this patch, it is not optimal. Subjectively, I hope these patches can be backported to LLVM 19 to avoid ABI incompatibility issues across multiple versions. If we don’t break anything unrelated to fp16, it won’t be a bad thing to work on fixing fp16, as it was already broken to begin with. Thanks.

@arsenm
Copy link
Contributor

arsenm commented Sep 24, 2024

This patch is fixing the stated issue in a roundabout way. You do not need to change the ABI or half promotion strategy just to support freeze on half values

@nikic
Copy link
Contributor

nikic commented Sep 24, 2024

fp16 support is generally quite broken outside of a few targets -- from a Rust perspective, we'd prefer full support in LLVM 20 rather than backporting things piecemeal to LLVM 19. Especially if it's going to change the ABI, which I think means we'd need LLVM-patch-version-specific handling in compiler-builtins.

@tgross35
Copy link

Currently, Rust's compiler-builtins has marked fp16 as available for loongarch64, but in fact, the functionality is broken. Even with this patch, it is not optimal. Subjectively, I hope these patches can be backported to LLVM 19 to avoid ABI incompatibility issues across multiple versions. If we don’t break anything unrelated to fp16, it won’t be a bad thing to work on fixing fp16, as it was already broken to begin with. Thanks.

If you are referring to https://github.com/rust-lang/compiler-builtins/blob/bb18ce58c2b5fec081a5dd3553aa960a36b8de5c/configure.rs#L51-L91, we only disable f16/f128 on platforms where LLVM 19 actually has a crash or produces code that can't be linked. Quite a few of the targets that aren't disabled there have some form of ABI issue - so we just build the symbols but don't actually test with them.

(the config for what gets tested is at https://github.com/rust-lang/rust/blob/2bd1e894efde3b6be857ad345914a3b1cea51def/library/std/build.rs#L81-L186)

@tru
Copy link
Collaborator

tru commented Oct 1, 2024

Sounds to me like we don't have to accept this patch into the 19.1 release. Please re-open and argue if you don't agree with that assesment.

@heiher
Copy link
Member

heiher commented Oct 19, 2024

Update: #109368 (comment)

I suggest continuing this PR to ensure that fp16 support is functional on the release/19.x.

@heiher heiher reopened this Oct 19, 2024
…ongarch (llvm#107791)

For zig with LLVM 19.1.0rc4, we are seeing the following error when
bootstrapping a `loongarch64-linux-musl` target.

ziglang/zig-bootstrap#164 (comment)

It seems that this issue is caused by `PromoteFloatResult` is not
handling FREEZE OP on loongarch.

Here is the reproduction of the error: https://godbolt.org/z/PPfvWjjG5

~~This patch adds the FREEZE OP handling with `PromoteFloatRes_UnaryOp`
and adds a test case.~~

This patch changes loongarch's way of floating point promotion to soft
promotion to avoid this problem.

See: loongarch's handling of `half`:
- llvm#93894
- llvm#94456

Also see: other float promotion FREEZE handling
-
llvm@0019c2f

(cherry picked from commit 13280d9)
@tru
Copy link
Collaborator

tru commented Oct 28, 2024

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

cc @nikic @arsenm @tgross35

@heiher
Copy link
Member

heiher commented Oct 28, 2024

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

cc @nikic @arsenm @tgross35

After deciding that FPU does not support fp16, there’s no longer any ABI-breaking impact. Without this patch, LoongArch’s software emulation for fp16 operations still has correctness issues. This is why downstream software depends on it, which is the reason for my request. Thanks.

@tgross35
Copy link

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

Speaking only from a Rust perspective, don't worry too much about making breaking changes that fix f16 or f128 behavior. The types are nightly-only for now (probably will be that way for at least a year or so) so we don't have any public guarantees. (I would just get to un-skip f16 tests on another arch sooner).

I don't know what other frontends exposes this type but the commit mentions Zig.

If it is preferable to keep using float registers, it sounds like there is another fix for the precision issue rather than changing the ABI #97975 (comment). I don't think there is a test for the precision yet.

@arsenm
Copy link
Contributor

arsenm commented Oct 28, 2024

I would like to reiterate that this is a roundabout fix for the assertion in question. It sends it down a different path that happens to avoid it. It is not necessary to make this ABI change to fix the assertion.

tgross35 pushed a commit to rust-lang/compiler-builtins that referenced this pull request Nov 1, 2024
Disable `f161` for LoongArch64 due to incorrect code generation on LLVM 19,
which causes failures in `testcrate/tests/conv.rs`. This workaround will
remain in place until llvm/llvm-project#109093 is merged or we upgrade to
LLVM 20.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Status: Needs Review
Development

Successfully merging this pull request may close these issues.

7 participants