-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap #147047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/Analysis/AliasAnalysis.h" | ||
#include "llvm/Analysis/AliasSetTracker.h" | ||
#include "llvm/Analysis/AssumeBundleQueries.h" | ||
#include "llvm/Analysis/AssumptionCache.h" | ||
#include "llvm/Analysis/LoopAnalysisManager.h" | ||
#include "llvm/Analysis/LoopInfo.h" | ||
#include "llvm/Analysis/LoopIterator.h" | ||
|
@@ -208,28 +210,52 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, | |
|
||
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at | ||
/// \p MaxBTC is guaranteed inbounds of the accessed object. | ||
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | ||
const SCEV *MaxBTC, | ||
const SCEV *EltSize, | ||
ScalarEvolution &SE, | ||
const DataLayout &DL) { | ||
static bool | ||
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | ||
const SCEV *MaxBTC, const SCEV *EltSize, | ||
ScalarEvolution &SE, const DataLayout &DL, | ||
AssumptionCache *AC, DominatorTree *DT) { | ||
auto *PointerBase = SE.getPointerBase(AR->getStart()); | ||
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase); | ||
if (!StartPtr) | ||
return false; | ||
const Loop *L = AR->getLoop(); | ||
bool CheckForNonNull, CheckForFreed; | ||
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( | ||
Value *StartPtrV = StartPtr->getValue(); | ||
uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes( | ||
DL, CheckForNonNull, CheckForFreed); | ||
|
||
if (CheckForNonNull || CheckForFreed) | ||
if (DerefBytes && (CheckForNonNull || CheckForFreed)) | ||
return false; | ||
|
||
const SCEV *Step = AR->getStepRecurrence(SE); | ||
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); | ||
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); | ||
|
||
// Check if we have a suitable dereferencable assumption we can use. | ||
RetainedKnowledge DerefRK; | ||
if (!StartPtrV->canBeFreed() && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could be simplified by just doing:
|
||
getKnowledgeForValue( | ||
StartPtrV, {Attribute::Dereferenceable}, *AC, | ||
[&](RetainedKnowledge RK, Instruction *Assume, auto) { | ||
if (!isValidAssumeForContext( | ||
Assume, L->getLoopPredecessor()->getTerminator(), DT)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is it safe to assume that the loop predecessor is a good enough context for the assume? Couldn't the assumption be broken in the loop? I was expecting to see the context instruction here being the actual pointer corresponding to the thing that could potentially wrap, i.e. %gep = getelementptr ... |
||
return false; | ||
if (RK.AttrKind == Attribute::Dereferenceable) { | ||
DerefRK = std::max(DerefRK, RK); | ||
return true; | ||
} | ||
return false; | ||
}) && | ||
DerefRK.ArgValue) { | ||
DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV, | ||
SE.getConstant(WiderTy, DerefRK.ArgValue)); | ||
} | ||
|
||
bool IsKnownNonNegative = SE.isKnownNonNegative(Step); | ||
if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) | ||
return false; | ||
|
||
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); | ||
Step = SE.getNoopOrSignExtend(Step, WiderTy); | ||
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); | ||
|
||
|
@@ -256,24 +282,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, | |
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE); | ||
if (!EndBytes) | ||
return false; | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, | ||
SE.getConstant(WiderTy, DerefBytes)); | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV); | ||
} | ||
|
||
// For negative steps check if | ||
// * StartOffset >= (MaxBTC * Step + EltSize) | ||
// * StartOffset <= DerefBytes. | ||
assert(SE.isKnownNegative(Step) && "must be known negative"); | ||
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && | ||
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, | ||
SE.getConstant(WiderTy, DerefBytes)); | ||
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV); | ||
} | ||
|
||
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( | ||
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, | ||
const SCEV *MaxBTC, ScalarEvolution *SE, | ||
DenseMap<std::pair<const SCEV *, Type *>, | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds) { | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds, | ||
AssumptionCache *AC, DominatorTree *DT) { | ||
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; | ||
if (PointerBounds) { | ||
auto [Iter, Ins] = PointerBounds->insert( | ||
|
@@ -308,8 +333,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( | |
// sets ScEnd to the maximum unsigned value for the type. Note that LAA | ||
// separately checks that accesses cannot not wrap, so unsigned max | ||
// represents an upper bound. | ||
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, | ||
DL)) { | ||
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL, | ||
AC, DT)) { | ||
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); | ||
} else { | ||
ScEnd = SE->getAddExpr( | ||
|
@@ -356,9 +381,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, | |
bool NeedsFreeze) { | ||
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
const SCEV *BTC = PSE.getBackedgeTakenCount(); | ||
const auto &[ScStart, ScEnd] = | ||
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, | ||
PSE.getSE(), &DC.getPointerBounds()); | ||
const auto &[ScStart, ScEnd] = getStartAndEndForAccess( | ||
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(), | ||
&DC.getPointerBounds(), DC.getAC(), DC.getDT()); | ||
assert(!isa<SCEVCouldNotCompute>(ScStart) && | ||
!isa<SCEVCouldNotCompute>(ScEnd) && | ||
"must be able to compute both start and end expressions"); | ||
|
@@ -2011,10 +2036,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
const auto &[SrcStart_, SrcEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, | ||
PSE.getSE(), &PointerBounds); | ||
PSE.getSE(), &PointerBounds, AC, DT); | ||
const auto &[SinkStart_, SinkEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, | ||
PSE.getSE(), &PointerBounds); | ||
PSE.getSE(), &PointerBounds, AC, DT); | ||
if (!isa<SCEVCouldNotCompute>(SrcStart_) && | ||
!isa<SCEVCouldNotCompute>(SrcEnd_) && | ||
!isa<SCEVCouldNotCompute>(SinkStart_) && | ||
|
@@ -3015,7 +3040,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | |
const TargetTransformInfo *TTI, | ||
const TargetLibraryInfo *TLI, AAResults *AA, | ||
DominatorTree *DT, LoopInfo *LI, | ||
bool AllowPartial) | ||
AssumptionCache *AC, bool AllowPartial) | ||
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), | ||
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { | ||
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); | ||
|
@@ -3025,8 +3050,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | |
MaxTargetVectorWidthInBits = | ||
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; | ||
|
||
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, | ||
MaxTargetVectorWidthInBits); | ||
DepChecker = std::make_unique<MemoryDepChecker>( | ||
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits); | ||
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); | ||
if (canAnalyzeLoop()) | ||
CanVecMem = analyzeLoop(AA, LI, TLI, DT); | ||
|
@@ -3095,7 +3120,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, | |
// or if it was created with a different value of AllowPartial. | ||
if (Inserted || It->second->hasAllowPartial() != AllowPartial) | ||
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, | ||
&LI, AllowPartial); | ||
&LI, AC, AllowPartial); | ||
|
||
return *It->second; | ||
} | ||
|
@@ -3138,7 +3163,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, | |
auto &LI = FAM.getResult<LoopAnalysis>(F); | ||
auto &TTI = FAM.getResult<TargetIRAnalysis>(F); | ||
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); | ||
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); | ||
auto &AC = FAM.getResult<AssumptionAnalysis>(F); | ||
return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC); | ||
} | ||
|
||
AnalysisKey LoopAccessAnalysis::Key; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An alternative would be to retrieve them directly from ScalarEvolution, which holds them already, but it's not accessible at the moment. Not sure if we should expose them to use more conveniently here in the patch