Skip to content

Commit

Permalink
[RISCV] Support vXf16 vector_shuffle with Zvfhmin. (#97491)
Browse files Browse the repository at this point in the history
We can shuffle vXf16 vectors just like vXi16 vectors. We don't need any
FP instructions. Update the predicates for vrgather and vslides patterns
to only check the predicates based on the equivalent integer type. If we
use the FP type it will check Zvfh and block Zvfhmin.

These are probably not the only patterns that need to be fixed, but the
test from the bug report no longer crashes.

Fixes #97477
  • Loading branch information
topperc authored Jul 4, 2024
1 parent 30df629 commit c67653f
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 7 deletions.
17 changes: 13 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR},
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_SHUFFLE},
VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
Expand Down Expand Up @@ -5040,6 +5040,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
}

MVT SplatVT = ContainerVT;

// If we don't have Zfh, we need to use an integer scalar load.
if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
SVT = MVT::i16;
SplatVT = ContainerVT.changeVectorElementType(SVT);
}

// Otherwise use a scalar load and splat. This will give the best
// opportunity to fold a splat into the operation. ISel can turn it into
// the x0 strided load if we aren't able to fold away the select.
Expand All @@ -5055,10 +5063,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, V);

unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
SDValue Splat =
DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
Splat = DAG.getBitcast(ContainerVT, Splat);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,8 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {

multiclass VPatSlideVL_VX_VI<SDNode vop, string instruction_name> {
foreach vti = AllVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
defvar ivti = GetIntVTypeInfo<vti>.Vti;
let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
(vti.Vector vti.RegClass:$rs1),
uimm5:$rs2, (vti.Mask V0),
Expand Down Expand Up @@ -3001,8 +3002,7 @@ foreach vti = AllFloatVectors in {
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
}
defvar ivti = GetIntVTypeInfo<vti>.Vti;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<ivti>.Predicates) in {
let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(vti.Vector
(riscv_vrgather_vv_vl vti.RegClass:$rs2,
(ivti.Vector vti.RegClass:$rs1),
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64

define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
; CHECK-LABEL: shuffle_v4f16:
Expand Down Expand Up @@ -262,6 +264,51 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
%s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x double> %s
}

define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: addi a0, a0, 513
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v10, v9
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
ret <4 x half> %s
}

define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI21_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
%s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
ret <4 x half> %s
}

define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) {
; CHECK-LABEL: vrgather_shuffle_vx_v4f16_load:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 2
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vlse16.v v8, (a0), zero
; CHECK-NEXT: ret
%v = load <4 x half>, ptr %p
%s = shufflevector <4 x half> %v, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x half> %s
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}

0 comments on commit c67653f

Please sign in to comment.