Skip to content

Commit

Permalink
[X86] Add combineExtractFromVectorLoad helper - pulled out of combine…
Browse files Browse the repository at this point in the history
…ExtractVectorElt

Prep work for #85419 to make it easier to reuse in other combines
  • Loading branch information
RKSimon committed Mar 27, 2024
1 parent 9247f31 commit 875aed1
Showing 1 changed file with 49 additions and 29 deletions.
78 changes: 49 additions & 29 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43995,6 +43995,49 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
Extract->getOperand(1));
}

// If this extract is from a loaded vector value and will be used as an
// integer, that requires a potentially expensive XMM -> GPR transfer.
// Additionally, if we can convert to a scalar integer load, that will likely
// be folded into a subsequent integer op.
// Note: Unlike the related fold for this in DAGCombiner, this is not limited
// to a single-use of the loaded vector. For the reasons above, we
// expect this to be profitable even if it creates an extra load.
static SDValue
combineExtractFromVectorLoad(SDNode *N, SDValue InputVector, uint64_t Idx,
const SDLoc &dl, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
"Only EXTRACT_VECTOR_ELT supported so far");

const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT SrcVT = InputVector.getValueType();
EVT VT = N->getValueType(0);

bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
return Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
});

auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
if (LoadVec && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
!LikelyUsedAsVector && LoadVec->isSimple()) {
SDValue NewPtr = TLI.getVectorElementPointer(
DAG, LoadVec->getBasePtr(), SrcVT, DAG.getVectorIdxConstant(Idx, dl));
unsigned PtrOff = VT.getSizeInBits() * Idx / 8;
MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
SDValue Load =
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
return Load;
}

return SDValue();
}

// Attempt to peek through a target shuffle and extract the scalar from the
// source.
static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
Expand Down Expand Up @@ -44600,6 +44643,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
return V;

if (CIdx)
if (SDValue V = combineExtractFromVectorLoad(
N, InputVector, CIdx->getZExtValue(), dl, DAG, DCI))
return V;

// Attempt to extract a i1 element by using MOVMSK to extract the signbits
// and then testing the relevant element.
//
Expand Down Expand Up @@ -44645,34 +44693,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
}

// If this extract is from a loaded vector value and will be used as an
// integer, that requires a potentially expensive XMM -> GPR transfer.
// Additionally, if we can convert to a scalar integer load, that will likely
// be folded into a subsequent integer op.
// Note: Unlike the related fold for this in DAGCombiner, this is not limited
// to a single-use of the loaded vector. For the reasons above, we
// expect this to be profitable even if it creates an extra load.
bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
return Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
});
auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
!LikelyUsedAsVector && LoadVec->isSimple()) {
SDValue NewPtr =
TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
SDValue Load =
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
return Load;
}

return SDValue();
}

Expand Down Expand Up @@ -48273,7 +48293,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,

// We do not split for SSE at all, but we need to split vectors for AVX1 and
// AVX2.
if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&
TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {
SDValue LoX, HiX;
std::tie(LoX, HiX) = splitVector(X, DAG, DL);
Expand Down

0 comments on commit 875aed1

Please sign in to comment.