From cdecd48c613bb0f356e6274ba49a9603abd42aa1 Mon Sep 17 00:00:00 2001 From: AinsleySnow <772571228@qq.com> Date: Thu, 11 Apr 2024 16:55:29 +0800 Subject: [PATCH] [LLVM][XTHeadVector] Implement intrinsics for vwfmul. (#96) * [LLVM][XTHeadVector] Define intrinsic functions. * [LLVM][XTHeadVector] Define pseudos and pats. * [LLVM][XTHeadVector] Add test cases. * [NFC][XTHeadVector] Update Readme. --- README.md | 1 + .../include/llvm/IR/IntrinsicsRISCVXTHeadV.td | 3 + .../RISCV/RISCVInstrInfoXTHeadVPseudos.td | 35 + llvm/test/CodeGen/RISCV/rvv0p71/vfwmul.ll | 1045 +++++++++++++++++ 4 files changed, 1084 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv0p71/vfwmul.ll diff --git a/README.md b/README.md index 4021b4760ce4c3..6be2312391aa5e 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Any feature not listed below but present in the specification should be consider - (Done) `14.2. Vector Single-Width Floating-Point Add/Subtract Instructions` - (Done) `14.3. Vector Widening Floating-Point Add/Subtract Instructions` - (Done) `14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions` + - (Done) `14.5. Vector Widening Floating-Point Multiply` - (WIP) Clang intrinsics related to the `XTHeadVector` extension: - (WIP) `6. Configuration-Setting and Utility` - (Done) `6.1. Set vl and vtype` diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td index 0458eb3106f612..5ab7f5bdd2b43d 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td @@ -898,6 +898,9 @@ let TargetPrefix = "riscv" in { defm th_vfdiv : XVBinaryAAXRoundingMode; defm th_vfrdiv : XVBinaryAAXRoundingMode; + // 14.5. Vector Widening Floating-Point Multiply + defm th_vfwmul : XVBinaryABXRoundingMode; + // 16.1. Vector Mask-Register Logical Operations def int_riscv_th_vmand: RISCVBinaryAAAUnMasked; def int_riscv_th_vmnand: RISCVBinaryAAAUnMasked; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td index efe9ac8a07937e..5a3e09b87a1cee 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td @@ -2517,6 +2517,29 @@ multiclass XVPseudoVFRDIV_VF_RM { } } +multiclass XVPseudoVWMUL_VV_VF_RM { + foreach m = MxListWXTHeadV in { + defvar mx = m.MX; + defvar WriteVFWMulV_MX = !cast("WriteVFWMulV_" # mx); + defvar ReadVFWMulV_MX = !cast("ReadVFWMulV_" # mx); + + defm "" : XVPseudoBinaryW_VV_RM, + Sched<[WriteVFWMulV_MX, ReadVFWMulV_MX, ReadVFWMulV_MX, ReadVMask]>; + } + + foreach f = FPListXTHeadV in { + foreach m = f.MxListFW in { + defvar mx = m.MX; + defvar WriteVFWMulF_MX = !cast("WriteVFWMulF_" # mx); + defvar ReadVFWMulV_MX = !cast("ReadVFWMulV_" # mx); + defvar ReadVFWMulF_MX = !cast("ReadVFWMulF_" # mx); + + defm "" : XVPseudoBinaryW_VF_RM, + Sched<[WriteVFWMulF_MX, ReadVFWMulV_MX, ReadVFWMulF_MX, ReadVMask]>; + } + } +} + multiclass XVPseudoVALU_MM { foreach m = MxListXTHeadV in { defvar mx = m.MX; @@ -3980,6 +4003,18 @@ let Predicates = [HasVendorXTHeadV] in { AllFloatXVectors, isSEWAware=1>; } // Predicates = [HasVendorXTHeadV] +//===----------------------------------------------------------------------===// +// 14.5. Vector Single-Width Floating-Point Add/Subtract Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasVendorXTHeadV], mayRaiseFPException = true, hasSideEffects = 0 in { + defm PseudoTH_VFWMUL : XVPseudoVWMUL_VV_VF_RM; +} + +let Predicates = [HasVendorXTHeadV] in { + defm : XVPatBinaryW_VV_VX_RM<"int_riscv_th_vfwmul", "PseudoTH_VFWMUL", + AllWidenableFloatXVectors>; +} + //===----------------------------------------------------------------------===// // 16.1. Vector Mask-Register Logical Operations //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv0p71/vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv0p71/vfwmul.ll new file mode 100644 index 00000000000000..73d7c955c3c276 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv0p71/vfwmul.ll @@ -0,0 +1,1045 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvector,+zfh,+d \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvector,+zfh,+d \ +; RUN: -verify-machineinstrs | FileCheck %s + +declare @llvm.riscv.th.vfwmul.nxv4f32.nxv4f16.nxv4f16( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v10 +; CHECK-NEXT: th.vmv.v.v v9, v11 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv4f32.nxv4f16.nxv4f16( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv8f32.nxv8f16.nxv8f16( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v12, v8, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v12 +; CHECK-NEXT: th.vmv.v.v v9, v13 +; CHECK-NEXT: th.vmv.v.v v10, v14 +; CHECK-NEXT: th.vmv.v.v v11, v15 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv8f32.nxv8f16.nxv8f16( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv16f32.nxv16f16.nxv16f16( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v16, v8, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v16 +; CHECK-NEXT: th.vmv.v.v v9, v17 +; CHECK-NEXT: th.vmv.v.v v10, v18 +; CHECK-NEXT: th.vmv.v.v v11, v19 +; CHECK-NEXT: th.vmv.v.v v12, v20 +; CHECK-NEXT: th.vmv.v.v v13, v21 +; CHECK-NEXT: th.vmv.v.v v14, v22 +; CHECK-NEXT: th.vmv.v.v v15, v23 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv16f32.nxv16f16.nxv16f16( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv2f64.nxv2f32.nxv2f32( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v10 +; CHECK-NEXT: th.vmv.v.v v9, v11 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv2f64.nxv2f32.nxv2f32( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv4f64.nxv4f32.nxv4f32( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v12, v8, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v12 +; CHECK-NEXT: th.vmv.v.v v9, v13 +; CHECK-NEXT: th.vmv.v.v v10, v14 +; CHECK-NEXT: th.vmv.v.v v11, v15 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv4f64.nxv4f32.nxv4f32( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv8f64.nxv8f32.nxv8f32( + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v16, v8, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v16 +; CHECK-NEXT: th.vmv.v.v v9, v17 +; CHECK-NEXT: th.vmv.v.v v10, v18 +; CHECK-NEXT: th.vmv.v.v v11, v19 +; CHECK-NEXT: th.vmv.v.v v12, v20 +; CHECK-NEXT: th.vmv.v.v v13, v21 +; CHECK-NEXT: th.vmv.v.v v14, v22 +; CHECK-NEXT: th.vmv.v.v v15, v23 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv8f64.nxv8f32.nxv8f32( + undef, + %0, + %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32( + , + , + , + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32( + %0, + %1, + %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv4f32.nxv4f16.f16( + , + , + half, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v10, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v10 +; CHECK-NEXT: th.vmv.v.v v9, v11 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv4f32.nxv4f16.f16( + undef, + %0, + half %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv4f32.nxv4f16.f16( + , + , + half, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv4f32.nxv4f16.f16( + %0, + %1, + half %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv8f32.nxv8f16.f16( + , + , + half, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v12, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v12 +; CHECK-NEXT: th.vmv.v.v v9, v13 +; CHECK-NEXT: th.vmv.v.v v10, v14 +; CHECK-NEXT: th.vmv.v.v v11, v15 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv8f32.nxv8f16.f16( + undef, + %0, + half %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv8f32.nxv8f16.f16( + , + , + half, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv8f32.nxv8f16.f16( + %0, + %1, + half %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv16f32.nxv16f16.f16( + , + , + half, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v16, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v16 +; CHECK-NEXT: th.vmv.v.v v9, v17 +; CHECK-NEXT: th.vmv.v.v v10, v18 +; CHECK-NEXT: th.vmv.v.v v11, v19 +; CHECK-NEXT: th.vmv.v.v v12, v20 +; CHECK-NEXT: th.vmv.v.v v13, v21 +; CHECK-NEXT: th.vmv.v.v v14, v22 +; CHECK-NEXT: th.vmv.v.v v15, v23 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv16f32.nxv16f16.f16( + undef, + %0, + half %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv16f32.nxv16f16.f16( + , + , + half, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv16f32.nxv16f16.f16( + %0, + %1, + half %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv2f64.nxv2f32.f32( + , + , + float, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v10, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v10 +; CHECK-NEXT: th.vmv.v.v v9, v11 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv2f64.nxv2f32.f32( + undef, + %0, + float %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv2f64.nxv2f32.f32( + , + , + float, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv2f64.nxv2f32.f32( + %0, + %1, + float %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv4f64.nxv4f32.f32( + , + , + float, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v12, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v12 +; CHECK-NEXT: th.vmv.v.v v9, v13 +; CHECK-NEXT: th.vmv.v.v v10, v14 +; CHECK-NEXT: th.vmv.v.v v11, v15 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv4f64.nxv4f32.f32( + undef, + %0, + float %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv4f64.nxv4f32.f32( + , + , + float, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv4f64.nxv4f32.f32( + %0, + %1, + float %2, + %3, + iXLen 0, iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.nxv8f64.nxv8f32.f32( + , + , + float, + iXLen, iXLen); + +define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v16, v8, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vmv.v.v v8, v16 +; CHECK-NEXT: th.vmv.v.v v9, v17 +; CHECK-NEXT: th.vmv.v.v v10, v18 +; CHECK-NEXT: th.vmv.v.v v11, v19 +; CHECK-NEXT: th.vmv.v.v v12, v20 +; CHECK-NEXT: th.vmv.v.v v13, v21 +; CHECK-NEXT: th.vmv.v.v v14, v22 +; CHECK-NEXT: th.vmv.v.v v15, v23 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.nxv8f64.nxv8f32.f32( + undef, + %0, + float %1, + iXLen 0, iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vfwmul.mask.nxv8f64.nxv8f32.f32( + , + , + float, + , + iXLen, iXLen); + +define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: th.vfwmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a1, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vfwmul.mask.nxv8f64.nxv8f32.f32( + %0, + %1, + float %2, + %3, + iXLen 0, iXLen %4) + + ret %a +}