-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[ISel/RISCV] Custom-promote [b]f16 in [l]lrint #146507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Ramkumar Ramachandra (artagnon) ChangesExtend lowerVectorXRINT to also do a FP_EXTEND_VL when the source element type is [b]f16, and wire up this custom-promote. Updating the cost-model to not give these an invalid cost is left to a companion patch. Patch is 48.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146507.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 326dd7149ef96..27104ffed82e1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1148,6 +1148,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1456,6 +1457,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
}
+ if (Subtarget.hasVInstructionsF16Minimal())
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
@@ -1480,6 +1483,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
}
+ if (Subtarget.hasVInstructionsBF16Minimal())
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
@@ -3484,6 +3489,14 @@ static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
}
auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
+
+ // [b]f16 -> f32
+ MVT SrcElemType = SrcVT.getVectorElementType();
+ if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
+ MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
+ Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
+ }
+
SDValue Res =
DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index 0a6f9f5ba0928..b9a84ff9b07b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -1,187 +1,277 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
-; RV32-LABEL: llrint_v1i64_v1f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v9, v8
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v1i64_v1f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v9, v8
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v1i64_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
ret <1 x i64> %a
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
-; RV32-LABEL: llrint_v2i64_v2f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vfwcvt.x.f.v v9, v8
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v2i64_v2f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vfwcvt.x.f.v v9, v8
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v2i64_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
-; RV32-LABEL: llrint_v3i64_v3f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv1r.v v10, v8
-; RV32-NEXT: vfwcvt.x.f.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v3i64_v3f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv1r.v v10, v8
-; RV64-NEXT: vfwcvt.x.f.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v3i64_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
%a = call <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float> %x)
ret <3 x i64> %a
}
declare <3 x i64> @llvm.llrint.v3i64.v3f32(<3 x float>)
define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
-; RV32-LABEL: llrint_v4i64_v4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv1r.v v10, v8
-; RV32-NEXT: vfwcvt.x.f.v v8, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v4i64_v4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv1r.v v10, v8
-; RV64-NEXT: vfwcvt.x.f.v v8, v10
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v4i64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
-; RV32-LABEL: llrint_v8i64_v8f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vmv2r.v v12, v8
-; RV32-NEXT: vfwcvt.x.f.v v8, v12
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v8i64_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vmv2r.v v12, v8
-; RV64-NEXT: vfwcvt.x.f.v v8, v12
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v8i64_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
+; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
-; RV32-LABEL: llrint_v16i64_v16f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmv4r.v v16, v8
-; RV32-NEXT: vfwcvt.x.f.v v8, v16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v16i64_v16f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV64-NEXT: vmv4r.v v16, v8
-; RV64-NEXT: vfwcvt.x.f.v v8, v16
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v16i64_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
+; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
ret <16 x i64> %a
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
-; RV32-LABEL: llrint_v1i64_v1f64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vfcvt.x.f.v v8, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v1i64_v1f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vfcvt.x.f.v v8, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v1i64_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
-; RV32-LABEL: llrint_v2i64_v2f64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vfcvt.x.f.v v8, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v2i64_v2f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vfcvt.x.f.v v8, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v2i64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
-; RV32-LABEL: llrint_v4i64_v4f64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT: vfcvt.x.f.v v8, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v4i64_v4f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vfcvt.x.f.v v8, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v4i64_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
-; RV32-LABEL: llrint_v8i64_v8f64:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT: vfcvt.x.f.v v8, v8
-; RV32-NEXT: ret
-;
-; RV64-LABEL: llrint_v8i64_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vfcvt.x.f.v v8, v8
-; RV64-NEXT: ret
+; CHECK-LABEL: llrint_v8i64_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v8
+; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+; CHECK-LABEL: llrint_v1i64_v1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
+
+define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
+; CHECK-LABEL: llrint_v2i64_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
+
+define <3 x i64> @llrint_v3i64_v3f16(<3 x half> %x) {
+; CHECK-LABEL: llrint_v3i64_v3f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
+ %a = call <3 x i64> @llvm.llrint.v3i64.v3f16(<3 x half> %x)
+ ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.llrint.v3i64.v3f16(<3 x half>)
+
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+; CHECK-LABEL: llrint_v4i64_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
+
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+; CHECK-LABEL: llrint_v8i64_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
+
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+; CHECK-LABEL: llrint_v16i64_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
+
+define <1 x i64> @llrint_v1i64_v1bf16(<1 x bfloat> %x) {
+; CHECK-LABEL: llrint_v1i64_v1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1bf16(<1 x bfloat> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1bf16(<1 x bfloat>)
+
+define <2 x i64> @llrint_v2i64_v2bf16(<2 x bfloat> %x) {
+; CHECK-LABEL: llrint_v2i64_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat>)
+
+define <3 x i64> @llrint_v3i64_v3bf16(<3 x bfloat> %x) {
+; CHECK-LABEL: llrint_v3i64_v3bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
+ %a = call <3 x i64> @llvm.llrint.v3i64.v3bf16(<3 x bfloat> %x)
+ ret <3 x i64> %a
+}
+declare <3 x i64> @llvm.llrint.v3i64.v3bf16(<3 x bfloat>)
+
+define <4 x i64> @llrint_v4i64_v4bf16(<4 x bfloat> %x) {
+; CHECK-LABEL: llrint_v4i64_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v10
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat>)
+
+define <8 x i64> @llrint_v8i64_v8bf16(<8 x bfloat> %x) {
+; CHECK-LABEL: llrint_v8i64_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v12
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat>)
+
+define <16 x i64> @llrint_v16i64_v16bf16(<16 x bfloat> %x) {
+; CHECK-LABEL: llrint_v16i64_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.x.f.v v8, v16
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index ef2208666e0b4..b28c69ac4c9ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin \
; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin \
; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin \
; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
@@ -249,3 +249,351 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
ret <8 x iXLen> %a
}
declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
+
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; RV32-LABEL: lrint_v1f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v9, v8
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v1f16:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-i32-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-i32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v9
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v1f16:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-i64-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-i64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v9
+; RV64-i64-NEXT: ret
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
+
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; RV32-LABEL: lrint_v2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v9, v8
+; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v2f16:
+; RV64-i32: # %bb.0:
+; RV64-i32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-i32-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-i32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-i32-NEXT: vfcvt.x.f.v v8, v9
+; RV64-i32-NEXT: ret
+;
+; RV64-i64-LABEL: lrint_v2f16:
+; RV64-i64: # %bb.0:
+; RV64-i64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-i64-NEXT: vfwcvt.f.f.v v9, v8
+; RV64-i64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-i64-NEXT: vfwcvt.x.f.v v8, v9
+; RV64-i64-NEXT: ret
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
+
+define <3 x iXLen> @lrint_v3f16(<3 x half> %x) {
+; RV32-LABEL: lrint_v3f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vfwcvt.f.f.v v9, v8
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vfcvt.x.f.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-i32-LABEL: lrint_v3f16:
+; RV6...
[truncated]
|
Extend lowerVectorXRINT to also do a FP_EXTEND_VL when the source element type is [b]f16, and wire up this custom-promote. Updating the cost-model to not give these an invalid cost is left to a companion patch.
dbe39f9
to
b2f0058
Compare
Extend lowerVectorXRINT to also do a FP_EXTEND_VL when the source element type is [b]f16, and wire up this custom-promote. Updating the cost-model to not give these an invalid cost is left to a companion patch.