Skip to content

Commit 0bb35d8

Browse files
committed
[RISCV] Handle fixed length vectors with exact VLEN in lowerINSERT_SUBVECTOR
This is the insert_subvector equivalent to llvm#79949, where we can avoid sliding up by the full LMUL amount if we know the exact subregister the subvector will be inserted into. This mirrors the lowerEXTRACT_SUBVECTOR changes in that we handle this in two parts: - We handle fixed length subvector types by converting the subvector to a scalable vector. But unlike EXTRACT_SUBVECTOR, we may also need to convert the vector being inserted into too. - Whenever we don't need a vslideup because either the subvector aligns to a vector register group *or* the vector is undef, we need to emit an insert_subreg ourselves because RISCVISelDAGToDAG::Select doesn't correctly handle fixed length subvectors yet: see d7a28f7 I've left RISCVISelDAGToDAG::Select untouched for now (minus relaxing an invariant), so that the insert_subvector and extract_subvector code paths are the same. We should teach it to properly handle fixed length subvectors in a follow-up patch, so that the "exact subregsiter" logic is handled in one place instead of being spread across both RISCVISelDAGToDAG.cpp and RISCVISelLowering.cpp.
1 parent 61b2a0e commit 0bb35d8

File tree

4 files changed

+334
-172
lines changed

4 files changed

+334
-172
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2099,8 +2099,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
20992099
MVT SubVecContainerVT = SubVecVT;
21002100
// Establish the correct scalable-vector types for any fixed-length type.
21012101
if (SubVecVT.isFixedLengthVector()) {
2102-
assert(Idx == 0 && V.isUndef());
21032102
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2103+
bool AlignedToVecReg = false;
2104+
if (auto VLen = Subtarget->getRealVLen();
2105+
VLen && SubVecVT.getSizeInBits() ==
2106+
SubVecContainerVT.getSizeInBits().getKnownMinValue() *
2107+
(*VLen / RISCV::RVVBitsPerBlock))
2108+
AlignedToVecReg = true;
2109+
assert(Idx == 0 && (AlignedToVecReg || V.isUndef()));
21042110
}
21052111
MVT ContainerVT = VT;
21062112
if (VT.isFixedLengthVector())

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9723,6 +9723,21 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
97239723
Vec, Mask, VL, DL, DAG, Subtarget);
97249724
}
97259725

9726+
/// Returns true if \p LHS is known to be equal to \p RHS, taking into account
9727+
/// if VLEN is exactly known by \p Subtarget and thus vscale when handling
9728+
/// scalable quantities.
9729+
static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
9730+
const RISCVSubtarget &Subtarget) {
9731+
if (auto VLen = Subtarget.getRealVLen()) {
9732+
const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9733+
if (LHS.isScalable())
9734+
LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
9735+
if (RHS.isScalable())
9736+
RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
9737+
}
9738+
return LHS == RHS;
9739+
}
9740+
97269741
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97279742
SelectionDAG &DAG) const {
97289743
SDValue Vec = Op.getOperand(0);
@@ -9772,12 +9787,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
97729787
}
97739788
}
97749789

9775-
// If the subvector vector is a fixed-length type, we cannot use subregister
9776-
// manipulation to simplify the codegen; we don't know which register of a
9777-
// LMUL group contains the specific subvector as we only know the minimum
9778-
// register size. Therefore we must slide the vector group up the full
9779-
// amount.
9780-
if (SubVecVT.isFixedLengthVector()) {
9790+
// If the subvector vector is a fixed-length type and we don't know VLEN
9791+
// exactly, we cannot use subregister manipulation to simplify the codegen; we
9792+
// don't know which register of a LMUL group contains the specific subvector
9793+
// as we only know the minimum register size. Therefore we must slide the
9794+
// vector group up the full amount.
9795+
const auto VLen = Subtarget.getRealVLen();
9796+
if (SubVecVT.isFixedLengthVector() && !VLen) {
97819797
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
97829798
return Op;
97839799
MVT ContainerVT = VecVT;
@@ -9825,41 +9841,92 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98259841
return DAG.getBitcast(Op.getValueType(), SubVec);
98269842
}
98279843

9828-
unsigned SubRegIdx, RemIdx;
9829-
std::tie(SubRegIdx, RemIdx) =
9830-
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9831-
VecVT, SubVecVT, OrigIdx, TRI);
9844+
MVT ContainerVecVT = VecVT;
9845+
if (VecVT.isFixedLengthVector()) {
9846+
ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9847+
Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9848+
}
98329849

9833-
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9850+
MVT ContainerSubVecVT = SubVecVT;
9851+
if (SubVecVT.isFixedLengthVector()) {
9852+
ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9853+
SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9854+
}
9855+
9856+
unsigned SubRegIdx;
9857+
ElementCount RemIdx;
9858+
// insert_subvector scales the index by vscale if the subvector is scalable,
9859+
// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9860+
// we have a fixed length subvector, we need to adjust the index by 1/vscale.
9861+
if (SubVecVT.isFixedLengthVector()) {
9862+
assert(VLen);
9863+
unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9864+
auto Decompose =
9865+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9866+
ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9867+
SubRegIdx = Decompose.first;
9868+
RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9869+
(OrigIdx % Vscale));
9870+
} else {
9871+
auto Decompose =
9872+
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
9873+
ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9874+
SubRegIdx = Decompose.first;
9875+
RemIdx = ElementCount::getScalable(Decompose.second);
9876+
}
9877+
9878+
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
98349879
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
98359880
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
98369881
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9882+
bool AlignedToVecReg = !IsSubVecPartReg;
9883+
if (SubVecVT.isFixedLengthVector())
9884+
AlignedToVecReg &= SubVecVT.getSizeInBits() ==
9885+
ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
9886+
(*VLen / RISCV::RVVBitsPerBlock);
98379887

98389888
// 1. If the Idx has been completely eliminated and this subvector's size is
98399889
// a vector register or a multiple thereof, or the surrounding elements are
98409890
// undef, then this is a subvector insert which naturally aligns to a vector
98419891
// register. These can easily be handled using subregister manipulation.
9842-
// 2. If the subvector is smaller than a vector register, then the insertion
9843-
// must preserve the undisturbed elements of the register. We do this by
9844-
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9845-
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
9846-
// subvector within the vector register, and an INSERT_SUBVECTOR of that
9892+
// 2. If the subvector isn't exactly aligned to a vector register group, then
9893+
// the insertion must preserve the undisturbed elements of the register. We do
9894+
// this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
9895+
// type (which resolves to a subregister copy), performing a VSLIDEUP to place
9896+
// the subvector within the vector register, and an INSERT_SUBVECTOR of that
98479897
// LMUL=1 type back into the larger vector (resolving to another subregister
98489898
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
98499899
// to avoid allocating a large register group to hold our subvector.
9850-
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9900+
if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
9901+
if (SubVecVT.isFixedLengthVector()) {
9902+
// We may get NoSubRegister if inserting at index 0 and the subvec
9903+
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9904+
if (SubRegIdx == RISCV::NoSubRegister) {
9905+
assert(OrigIdx == 0);
9906+
return Op;
9907+
}
9908+
9909+
SDValue Insert =
9910+
DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9911+
if (VecVT.isFixedLengthVector())
9912+
Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9913+
return Insert;
9914+
}
98519915
return Op;
9916+
}
98529917

98539918
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
98549919
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
98559920
// (in our case undisturbed). This means we can set up a subvector insertion
98569921
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
98579922
// size of the subvector.
9858-
MVT InterSubVT = VecVT;
9923+
MVT InterSubVT = ContainerVecVT;
98599924
SDValue AlignedExtract = Vec;
9860-
unsigned AlignedIdx = OrigIdx - RemIdx;
9861-
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9862-
InterSubVT = getLMUL1VT(VecVT);
9925+
unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9926+
if (SubVecVT.isFixedLengthVector())
9927+
AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9928+
if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9929+
InterSubVT = getLMUL1VT(ContainerVecVT);
98639930
// Extract a subvector equal to the nearest full vector register type. This
98649931
// should resolve to a EXTRACT_SUBREG instruction.
98659932
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9870,25 +9937,23 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98709937
DAG.getUNDEF(InterSubVT), SubVec,
98719938
DAG.getVectorIdxConstant(0, DL));
98729939

9873-
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9940+
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
98749941

9875-
ElementCount EndIndex =
9876-
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
9877-
VL = computeVLMax(SubVecVT, DL, DAG);
9942+
ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9943+
VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
98789944

98799945
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
98809946
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9881-
if (EndIndex == InterSubVT.getVectorElementCount())
9947+
if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
98829948
Policy = RISCVII::TAIL_AGNOSTIC;
98839949

98849950
// If we're inserting into the lowest elements, use a tail undisturbed
98859951
// vmv.v.v.
9886-
if (RemIdx == 0) {
9952+
if (RemIdx.isZero()) {
98879953
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
98889954
SubVec, VL);
98899955
} else {
9890-
SDValue SlideupAmt =
9891-
DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9956+
SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
98929957

98939958
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
98949959
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9899,10 +9964,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
98999964

99009965
// If required, insert this subvector back into the correct vector register.
99019966
// This should resolve to an INSERT_SUBREG instruction.
9902-
if (VecVT.bitsGT(InterSubVT))
9903-
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9967+
if (ContainerVecVT.bitsGT(InterSubVT))
9968+
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
99049969
DAG.getVectorIdxConstant(AlignedIdx, DL));
99059970

9971+
if (VecVT.isFixedLengthVector())
9972+
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9973+
99069974
// We might have bitcast from a mask type: cast back to the original type if
99079975
// required.
99089976
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

0 commit comments

Comments
 (0)