Skip to content

[LV]Enable non-power-of-2 store-load forwarding distance in predicated DataWithEVL vectorization mode #100755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rebase
Created using spr 1.3.5
  • Loading branch information
alexey-bataev committed Apr 14, 2025
commit 24d478e2159049f1e0a7c21da249c8c8857c6bb5
34 changes: 25 additions & 9 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class TargetTransformInfo;
struct VectorizerParams {
/// Maximum SIMD width.
static const unsigned MaxVectorWidth;
/// Maximum scalable vector width.
static constexpr unsigned MaxScalableVectorWidth = 512;
/// Maximum non-power-of-2 vector width.
static constexpr unsigned MaxNonPowerOf2VectorWidth = 512;

/// VF as overridden by the user.
static unsigned VectorizationFactor;
Expand Down Expand Up @@ -221,17 +221,28 @@ class MemoryDepChecker {

/// Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return MaxStoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max();
return MaxPowerOf2StoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max() &&
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits ==
std::numeric_limits<uint64_t>::max();
}

/// Return safe number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits.
uint64_t getStoreLoadForwardSafeDistanceInBits() const {
/// forwarding, multiplied by the size of the elements in bits (power-of-2).
uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
return MaxStoreLoadForwardSafeDistanceInBits;
return MaxPowerOf2StoreLoadForwardSafeDistanceInBits;
}

/// Return safe number of elements, which do not prevent store-load
/// forwarding, multiplied by the size of the elements in bits (power-of-2).
uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits;
}

/// In same cases when the dependency check fails we can still
Expand Down Expand Up @@ -323,8 +334,13 @@ class MemoryDepChecker {
uint64_t MaxSafeVectorWidthInBits = -1U;

/// Maximum number of elements, which do not prevent store-load forwarding,
/// multiplied by the size of the elements in bits.
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
/// multiplied by the size of the elements in bits (power-of-2).
uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();

/// Maximum number of elements, which do not prevent store-load forwarding,
/// multiplied by the size of the elements in bits (non-power-of-2).
uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();

/// If we see a non-constant dependence distance we can still try to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,15 @@ class LoopVectorizationLegality {

/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding and safe to operate simultaneously.
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
uint64_t getPowerOf2MaxStoreLoadForwardSafeDistanceInBits() const {
return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits();
}

/// Return safe non-power-of-2 number of elements, which do not prevent
/// store-load forwarding and safe to operate simultaneously.
uint64_t getNonPowerOf2MaxStoreLoadForwardSafeDistanceInBits() const {
return LAI->getDepChecker()
.getNonPowerOf2StoreLoadForwardSafeDistanceInBits();
}

/// Returns true if vector representation of the instruction \p I
Expand Down
33 changes: 17 additions & 16 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1756,7 +1756,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// Maximum vector factor.
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
MaxStoreLoadForwardSafeDistanceInBits);
MaxPowerOf2StoreLoadForwardSafeDistanceInBits);
uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0;

// Compute the smallest VF at which the store and load would be misaligned.
Expand All @@ -1775,18 +1775,18 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// than 8 (NumItersForStoreLoadThroughMemory).
if (AllowNonPow2Deps) {
MaxVFWithoutSLForwardIssuesNonPowerOf2 =
std::min(VectorizerParams::MaxScalableVectorWidth * TypeByteSize,
MaxStoreLoadForwardSafeDistanceInBits);
std::min(VectorizerParams::MaxNonPowerOf2VectorWidth * TypeByteSize,
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits);

for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2,
E = 2 * TypeByteSize;
VF >= E; VF -= TypeByteSize) {
for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2;
VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) {
if (Distance % VF == 0 ||
Distance / VF >= NumItersForStoreLoadThroughMemory) {
uint64_t GCD =
isSafeForAnyStoreLoadForwardDistances()
? VF
: std::gcd(MaxStoreLoadForwardSafeDistanceInBits, VF);
: std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits,
VF);
MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD;
break;
}
Expand All @@ -1805,26 +1805,25 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// be calculated.
if (AllowNonPow2Deps && CommonStride &&
MaxVFWithoutSLForwardIssuesNonPowerOf2 <
MaxStoreLoadForwardSafeDistanceInBits &&
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesNonPowerOf2 !=
VectorizerParams::MaxScalableVectorWidth * TypeByteSize) {
VectorizerParams::MaxNonPowerOf2VectorWidth * TypeByteSize) {
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride;
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxStoreLoadForwardSafeDistanceInBits =
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
return false;
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}

if (CommonStride &&
MaxVFWithoutSLForwardIssuesPowerOf2 <
MaxStoreLoadForwardSafeDistanceInBits &&
MaxPowerOf2StoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesPowerOf2 !=
VectorizerParams::MaxVectorWidth * TypeByteSize) {
uint64_t MaxVF =
bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxStoreLoadForwardSafeDistanceInBits =
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}
return false;
}
Expand Down Expand Up @@ -3040,7 +3039,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS << " with a maximum safe vector width of "
<< DC.getMaxSafeVectorWidthInBits() << " bits";
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits();
if (SLDist == std::numeric_limits<uint64_t>::max())
SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits();
OS << ", with a maximum safe store-load forward width of " << SLDist
<< " bits";
}
Expand Down
25 changes: 16 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3805,16 +3805,23 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
Legal->getMaxSafeVectorWidthInBits() / WidestType;
unsigned MaxSafeElementsPowerOf2 = bit_floor(MaxSafeElementsNonPowerOf2);
if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
unsigned SLVF = SLDist / WidestType;
MaxSafeElementsPowerOf2 =
std::min(MaxSafeElementsPowerOf2, 1U << countr_zero(SLVF));
if (FoldTailByMasking && AllowNonPowerOf2SafeDist)
MaxSafeElements = Legal->isSafeForAnyVectorWidth()
? SLVF
: std::gcd(MaxSafeElementsNonPowerOf2, SLVF);
else
uint64_t SLDist = Legal->getPowerOf2MaxStoreLoadForwardSafeDistanceInBits();
if (SLDist != std::numeric_limits<uint64_t>::max()) {
unsigned SLVF = SLDist / WidestType;
MaxSafeElementsPowerOf2 = std::min(MaxSafeElementsPowerOf2, SLVF);
}
if (FoldTailByMasking && AllowNonPowerOf2SafeDist) {
uint64_t SLDist =
Legal->getNonPowerOf2MaxStoreLoadForwardSafeDistanceInBits();
if (SLDist != std::numeric_limits<uint64_t>::max()) {
unsigned SLVF = SLDist / WidestType;
MaxSafeElements = Legal->isSafeForAnyVectorWidth()
? SLVF
: std::gcd(MaxSafeElementsNonPowerOf2, SLVF);
}
} else {
MaxSafeElements = MaxSafeElementsPowerOf2;
}
}
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
auto MaxSafeScalableVF = getMaxLegalScalableVF(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ define void @safe_dep(ptr %p) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[V]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[EVL_NEXT]]> = add nuw vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.