-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[RISCV][TTI] Scale the cost of FP-Int conversion with LMUL #87506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-analysis Author: Shih-Po Hung (arcbbb) ChangesWidening/narrowing the source data type to match the destination data type may require multiple steps. Patch is 352.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87506.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 38304ff90252f0..6ea17aa1130963 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -988,31 +988,106 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return Cost;
}
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
+ case ISD::FP_TO_UINT: {
+ unsigned IsSigned = ISD == ISD::FP_TO_SINT;
+ unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
+ unsigned FWCVT =
+ IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
+ unsigned FNCVT =
+ IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
+ unsigned SrcEltSize = Src->getScalarSizeInBits();
+ unsigned DstEltSize = Dst->getScalarSizeInBits();
+ if (DstEltSize == 1) {
+ // For fp vector to mask, we use:
+ // vfncvt.rtz.x.f.w v9, v8
+ // vand.vi v8, v9, 1
+ // vmsne.vi v0, v8, 0
+ SrcEltSize /= 2;
+ MVT ElementVT = MVT::getIntegerVT(SrcEltSize);
+ MVT InterimVT = SrcLT.second.changeVectorElementType(ElementVT);
+ return getRISCVInstructionCost(FNCVT, InterimVT, CostKind) +
+ getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
+ DstLT.second, CostKind);
+ }
+ if (DstEltSize == SrcEltSize)
+ return getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
+ if (DstEltSize == (2 * SrcEltSize))
+ return getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
+ if (DstEltSize == (4 * SrcEltSize) && (SrcEltSize == 16)) {
+ // Convert f16 to f32 then convert f32 to i64.
+ MVT VecF32VT = DstLT.second.changeVectorElementType(MVT::f32);
+ return getRISCVInstructionCost(RISCV::VFWCVT_F_F_V, VecF32VT, CostKind) +
+ getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
+ }
+ if (DstEltSize < SrcEltSize) {
+ SrcEltSize /= 2;
+ MVT ElementVT = MVT::getIntegerVT(SrcEltSize);
+ MVT InterimVT = DstLT.second.changeVectorElementType(ElementVT);
+ InstructionCost Cost =
+ getRISCVInstructionCost(FNCVT, InterimVT, CostKind);
+ while (DstEltSize < SrcEltSize) {
+ SrcEltSize /= 2;
+ ElementVT = MVT::getIntegerVT(SrcEltSize);
+ InterimVT = DstLT.second.changeVectorElementType(ElementVT);
+ Cost += getRISCVInstructionCost(RISCV::VNSRL_WI, InterimVT, CostKind);
+ }
+ return Cost;
+ }
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ }
case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
- // The cost of convert from or to mask vector is different from other
- // cases. We could not use PowDiff to calculate it.
- // For mask vector to fp, we should use the following instructions:
+ case ISD::UINT_TO_FP: {
+ unsigned IsSigned = ISD == ISD::SINT_TO_FP;
+ unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
+ unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
+ unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
+ unsigned SrcEltSize = Src->getScalarSizeInBits();
+ unsigned DstEltSize = Dst->getScalarSizeInBits();
+
+ if (SrcEltSize == 1) {
+ // For mask vector to fp, we use:
// vmv.v.i v8, 0
// vmerge.vim v8, v8, -1, v0
- // vfcvt.f.x.v v8, v8
+ // vfwcvt.f.x.v v8, v8
+ MVT ElementVT = MVT::getIntegerVT(DstEltSize >> 1);
+ MVT VecHalfVT = DstLT.second.changeVectorElementType(ElementVT);
+ return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},
+ VecHalfVT, CostKind) +
+ getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
+ }
- // And for fp vector to mask, we use:
- // vfncvt.rtz.x.f.w v9, v8
- // vand.vi v8, v9, 1
- // vmsne.vi v0, v8, 0
- return 3;
+ if (DstEltSize == SrcEltSize)
+ return getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
+
+ if (DstEltSize == (2 * SrcEltSize))
+ return getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
+
+ if (DstEltSize == (4 * SrcEltSize)) {
+ unsigned WidenIntOp = IsSigned ? RISCV::VSEXT_VF2 : RISCV::VZEXT_VF2;
+ MVT ElementVT = MVT::getIntegerVT(DstEltSize >> 1);
+ MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
+ return getRISCVInstructionCost(WidenIntOp, VecVT, CostKind) +
+ getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
}
- if (std::abs(PowDiff) <= 1)
- return 1;
- // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
- // so it only need two conversion.
- if (Src->isIntOrIntVectorTy())
- return 2;
- // Counts of narrow/widen instructions.
- return std::abs(PowDiff);
+ if (DstEltSize == (8 * SrcEltSize)) {
+ unsigned WidenIntOp = IsSigned ? RISCV::VSEXT_VF4 : RISCV::VZEXT_VF4;
+ MVT ElementVT = MVT::getIntegerVT(DstEltSize >> 1);
+ MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
+ return getRISCVInstructionCost(WidenIntOp, VecVT, CostKind) +
+ getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
+ }
+ if (SrcEltSize == (2 * DstEltSize))
+ return getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
+
+ if ((SrcEltSize == (4 * DstEltSize)) && (DstEltSize == 16)) {
+ // Handle i64 to f16: vfncvt.f.x/xu + vfncvt.f.f
+ MVT DstVT = DstLT.second.changeVectorElementType(MVT::f32);
+ return getRISCVInstructionCost(FNCVT, DstVT, CostKind) +
+ getRISCVInstructionCost(RISCV::VFNCVT_F_F_W, DstLT.second,
+ CostKind);
+ }
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ }
}
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index 6ddd57a24c51f5..616310b30d0da9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -1725,87 +1725,87 @@ define void @fptosi() {
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
-; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16>
-; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
+; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
+; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16>
+; RV32-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32>
-; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64>
-; RV32-NEXT: Cost Model: Found an estimated cost...
[truncated]
|
// vmv.v.i v8, 0 | ||
// vmerge.vim v8, v8, -1, v0 | ||
// vfcvt.f.x.v v8, v8 | ||
// vfwcvt.f.x.v v8, v8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Weirdly vp.sitofp does not use a widening fwcvt here, but that's a separate issue that doesn't affect this patch.
ping |
1 similar comment
ping |
if ((SrcEltSize >> 1) > DstEltSize) { | ||
// For mask type, we use: | ||
// vand.vi v8, v9, 1 | ||
// vmsne.vi v0, v8, 0 | ||
VectorType *VecTy = | ||
VectorType::get(IntegerType::get(Dst->getContext(), SrcEltSize >> 1), | ||
cast<VectorType>(Dst)->getElementCount()); | ||
Cost += | ||
getCastInstrCost(Instruction::Trunc, Dst, VecTy, CCH, CostKind, I); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be moved into the SrcEltSize > DstEltSize
branch above, so we can reuse VecVT? Also I'm happy if you want to leave out the mask type comment, thanks for clarifying it in the reply to my review.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved but I cannot reuse it since one is MVT and the other is VectorType.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, can this be renamed to cast-half
since it tests both zvfh and zvfhmin?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
VectorType *VecTy = VectorType::get( | ||
IntegerType::get(Dst->getContext(), SrcEltSize >> 1), | ||
cast<VectorType>(Dst)->getElementCount()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use getTypeForEVT
so we reuse VecVT? Otherwise I don't think we use the legalized type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, thanks for catching that!
unsigned DstEltSize = Dst->getScalarSizeInBits(); | ||
InstructionCost Cost = 0; | ||
if ((SrcEltSize == 16) && | ||
(!ST->hasVInstructionsF16() || ((DstEltSize >> 1) > SrcEltSize))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use / 2
instead of >> 1
. Leave converting divide to shift as a compiler optimization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
34d9ba6
to
2f573e1
Compare
gentle ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to precommit splitting out the half tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, created in #106692
precommit f16 test for #87506 fp-int conversion
Widening/narrowing the source data type to match the destination data type may require multiple steps. To model the costs, the patch generated the interim type by following the logic in RISCVTargetLowering::lowerVPFPIntConvOp.
2f573e1
to
31a4557
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/39/builds/1391 Here is the relevant piece of the build log for the reference
|
Widening/narrowing the source data type to match the destination data type may require multiple steps.
To model the costs, the patch generated the interim type by following the logic in RISCVTargetLowering::lowerVPFPIntConvOp.