diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6fe683410d59c..45368a01a0a73 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1319,7 +1319,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, - ISD::EXTRACT_SUBVECTOR}, + ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_SHUFFLE}, VT, Custom); setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -5040,6 +5040,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); } + MVT SplatVT = ContainerVT; + + // If we don't have Zfh, we need to use an integer scalar load. + if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) { + SVT = MVT::i16; + SplatVT = ContainerVT.changeVectorElementType(SVT); + } + // Otherwise use a scalar load and splat. This will give the best // opportunity to fold a splat into the operation. ISel can turn it into // the x0 strided load if we aren't able to fold away the select. @@ -5055,10 +5063,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, Ld->getMemOperand()->getFlags()); DAG.makeEquivalentMemoryOrdering(Ld, V); - unsigned Opc = - VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; + unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL + : RISCVISD::VMV_V_X_VL; SDValue Splat = - DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); + DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL); + Splat = DAG.getBitcast(ContainerVT, Splat); return convertFromScalableVector(VT, Splat, DAG, Subtarget); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index a7945f2ee6c1b..cc294bf9254e8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2117,7 +2117,8 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM { multiclass VPatSlideVL_VX_VI { foreach vti = AllVectors in { - let Predicates = GetVTypePredicates.Predicates in { + defvar ivti = GetIntVTypeInfo.Vti; + let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), uimm5:$rs2, (vti.Mask V0), @@ -3001,8 +3002,7 @@ foreach vti = AllFloatVectors in { (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; } defvar ivti = GetIntVTypeInfo.Vti; - let Predicates = !listconcat(GetVTypePredicates.Predicates, - GetVTypePredicates.Predicates) in { + let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, (ivti.Vector vti.RegClass:$rs1), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 45c0a22b1939f..6408402ef787f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: shuffle_v4f16: @@ -262,6 +264,51 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) { %s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> ret <8 x double> %s } + +define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) { +; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: addi a0, a0, 513 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> + ret <4 x half> %s +} + +define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: vrgather_shuffle_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> + ret <4 x half> %s +} + +define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) { +; CHECK-LABEL: vrgather_shuffle_vx_v4f16_load: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), zero +; CHECK-NEXT: ret + %v = load <4 x half>, ptr %p + %s = shufflevector <4 x half> %v, <4 x half> undef, <4 x i32> + ret <4 x half> %s +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32: {{.*}} ; RV64: {{.*}}