-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LAA] Keep pointer checks on partial analysis #139719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
babdff1
6dc3239
686b462
d78421a
be7173d
5da2cc9
a77c6f9
32dcdac
2b02a5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -530,8 +530,10 @@ void RuntimePointerChecking::groupChecks( | |
// equivalence class, the iteration order is deterministic. | ||
for (auto M : DepCands.members(Access)) { | ||
auto PointerI = PositionMap.find(M.getPointer()); | ||
assert(PointerI != PositionMap.end() && | ||
"pointer in equivalence class not found in PositionMap"); | ||
// If we can't find the pointer in PositionMap that means we can't | ||
// generate a memcheck for it. | ||
if (PointerI == PositionMap.end()) | ||
continue; | ||
for (unsigned Pointer : PointerI->second) { | ||
bool Merged = false; | ||
// Mark this pointer as seen. | ||
|
@@ -693,10 +695,13 @@ class AccessAnalysis { | |
/// non-intersection. | ||
/// | ||
/// Returns true if we need no check or if we do and we can generate them | ||
/// (i.e. the pointers have computable bounds). | ||
/// (i.e. the pointers have computable bounds). A return value of false means | ||
/// we couldn't analyze and generate runtime checks for all pointers in the | ||
/// loop, but if \p AllowPartial is set then we will have checks for those | ||
/// pointers we could analyze. | ||
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop, | ||
const DenseMap<Value *, const SCEV *> &Strides, | ||
Value *&UncomputablePtr); | ||
Value *&UncomputablePtr, bool AllowPartial); | ||
|
||
/// Goes over all memory accesses, checks whether a RT check is needed | ||
/// and builds sets of dependent accesses. | ||
|
@@ -1181,8 +1186,8 @@ bool AccessAnalysis::createCheckForAccess( | |
|
||
bool AccessAnalysis::canCheckPtrAtRT( | ||
RuntimePointerChecking &RtCheck, Loop *TheLoop, | ||
const DenseMap<Value *, const SCEV *> &StridesMap, | ||
Value *&UncomputablePtr) { | ||
const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr, | ||
bool AllowPartial) { | ||
// Find pointers with computable bounds. We are going to use this information | ||
// to place a runtime bound check. | ||
bool CanDoRT = true; | ||
|
@@ -1275,7 +1280,8 @@ bool AccessAnalysis::canCheckPtrAtRT( | |
/*Assume=*/true)) { | ||
CanDoAliasSetRT = false; | ||
UncomputablePtr = Access.getPointer(); | ||
break; | ||
if (!AllowPartial) | ||
break; | ||
} | ||
} | ||
} | ||
|
@@ -1315,7 +1321,7 @@ bool AccessAnalysis::canCheckPtrAtRT( | |
} | ||
} | ||
|
||
if (MayNeedRTCheck && CanDoRT) | ||
if (MayNeedRTCheck && (CanDoRT || AllowPartial)) | ||
RtCheck.generateChecks(DepCands, IsDepCheckNeeded); | ||
|
||
LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() | ||
|
@@ -1329,7 +1335,7 @@ bool AccessAnalysis::canCheckPtrAtRT( | |
bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; | ||
assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) && | ||
"CanDoRTIfNeeded depends on RtCheck.Need"); | ||
if (!CanDoRTIfNeeded) | ||
if (!CanDoRTIfNeeded && !AllowPartial) | ||
RtCheck.reset(); | ||
return CanDoRTIfNeeded; | ||
} | ||
|
@@ -2599,9 +2605,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, | |
// Find pointers with computable bounds. We are going to use this information | ||
// to place a runtime bound check. | ||
Value *UncomputablePtr = nullptr; | ||
bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( | ||
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr); | ||
if (!CanDoRTIfNeeded) { | ||
HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this need to be a member variable? Can we keep the name which still seems accurate? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is used in LoopAccessInfo::print to decide whether to print the "Generated run-time checks are incomplete" message. |
||
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial); | ||
if (!HasCompletePtrRtChecking) { | ||
const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); | ||
recordAnalysis("CantIdentifyArrayBounds", I) | ||
<< "cannot identify array bounds"; | ||
|
@@ -2629,11 +2635,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, | |
PtrRtChecking->Need = true; | ||
|
||
UncomputablePtr = nullptr; | ||
CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( | ||
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr); | ||
HasCompletePtrRtChecking = | ||
Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides, | ||
UncomputablePtr, AllowPartial); | ||
|
||
// Check that we found the bounds for the pointer. | ||
if (!CanDoRTIfNeeded) { | ||
if (!HasCompletePtrRtChecking) { | ||
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); | ||
recordAnalysis("CantCheckMemDepsAtRunTime", I) | ||
<< "cannot check memory dependencies at runtime"; | ||
|
@@ -2908,9 +2915,10 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { | |
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | ||
const TargetTransformInfo *TTI, | ||
const TargetLibraryInfo *TLI, AAResults *AA, | ||
DominatorTree *DT, LoopInfo *LI) | ||
DominatorTree *DT, LoopInfo *LI, | ||
bool AllowPartial) | ||
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), | ||
PtrRtChecking(nullptr), TheLoop(L) { | ||
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { | ||
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); | ||
if (TTI && !TTI->enableScalableVectorization()) | ||
// Scale the vector width by 2 as rough estimate to also consider | ||
|
@@ -2959,6 +2967,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { | |
|
||
// List the pair of accesses need run-time checks to prove independence. | ||
PtrRtChecking->print(OS, Depth); | ||
if (PtrRtChecking->Need && !HasCompletePtrRtChecking) | ||
OS.indent(Depth) << "Generated run-time checks are incomplete\n"; | ||
OS << "\n"; | ||
|
||
OS.indent(Depth) | ||
|
@@ -2978,12 +2988,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { | |
PSE->print(OS, Depth); | ||
} | ||
|
||
const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { | ||
const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, | ||
bool AllowPartial) { | ||
const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L); | ||
|
||
if (Inserted) | ||
It->second = | ||
std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI); | ||
// We need to create the LoopAccessInfo if either we don't already have one, | ||
// or if it was created with a different value of AllowPartial. | ||
Comment on lines
+2995
to
+2996
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we keep the original one if it has AllowPartial = true and AllowPartial = false? It might be clearer if the flag is renamed to indicate that all runtime checks are collected, even if not valid. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had thought about that, but I think having consistent output is more useful than any minor efficiency gain from not having to recompute the LoopAccessInfo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok would have been good to resolve this discussion before merging. What consistent output are you thinking about? The first only output change would be from the printing pass? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we consider test/Analysis/LoopAccessAnalysis/allow-partial.ll, then with this change if we compare the output of Potentially we could see the same thing if we has a transform pass that has AllowPartial=true followed by print<access-info>, but the only transform that has AllowPartial=true is LoopVersioningLICM which constructs LoopAccessInfoManager itself instead of using the LoopAccessAnalysis pass, so the analysis results won't be re-used anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, thanks. But that would only impact the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, though potentially there could be some transform pass added in the future that uses LoopAccessAnalysis without setting AllowPartial but which behaves differently when there's existing results results with it set. |
||
if (Inserted || It->second->hasAllowPartial() != AllowPartial) | ||
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, | ||
&LI, AllowPartial); | ||
|
||
return *It->second; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -441,7 +441,6 @@ FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs())) | |
FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs())) | ||
FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs())) | ||
FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs())) | ||
FUNCTION_PASS("print<access-info>", LoopAccessInfoPrinterPass(errs())) | ||
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(errs())) | ||
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(errs())) | ||
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(errs())) | ||
|
@@ -583,6 +582,16 @@ FUNCTION_PASS_WITH_PARAMS( | |
return MergedLoadStoreMotionPass(Opts); | ||
}, | ||
parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb") | ||
FUNCTION_PASS_WITH_PARAMS( | ||
"print<access-info>", "LoopAccessInfoPrinterPass", | ||
[](bool AllowPartial) { | ||
return LoopAccessInfoPrinterPass(errs(), AllowPartial); | ||
}, | ||
[](StringRef Params) { | ||
return PassBuilder::parseSinglePassOption(Params, "allow-partial", | ||
"LoopAccessInfoPrinterPass"); | ||
}, | ||
Comment on lines
+590
to
+593
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can lift this out into parseLoopAccessInfoPrinterOptions to match the rest of the code? |
||
"allow-partial") | ||
FUNCTION_PASS_WITH_PARAMS( | ||
"print<da>", "DependenceAnalysisPrinterPass", | ||
[](bool NormalizeResults) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
; RUN: opt -disable-output -passes='print<access-info><allow-partial>,print<access-info>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-BEFORE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we have other print passes that take arguments already, but would it be more consistent to pare this as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This unfortunately can't be currently done with how passes are defined in PassRegistry.def: "print<access-info>" is the name of the pass, and the options come after that. |
||
; RUN: opt -disable-output -passes='print<access-info>,print<access-info><allow-partial>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-AFTER | ||
Comment on lines
+1
to
+2
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice test! |
||
|
||
; Check that we get the right results when loop access analysis is run twice, | ||
; once without partial results and once with. | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" | ||
|
||
define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { | ||
; ALLOW-BEFORE-LABEL: 'gep_loaded_offset' | ||
; ALLOW-BEFORE-NEXT: while.body: | ||
; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds | ||
; ALLOW-BEFORE-NEXT: Dependences: | ||
; ALLOW-BEFORE-NEXT: Run-time memory checks: | ||
; ALLOW-BEFORE-NEXT: Check 0: | ||
; ALLOW-BEFORE-NEXT: Comparing group GRP0: | ||
; ALLOW-BEFORE-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] | ||
; ALLOW-BEFORE-NEXT: Against group GRP1: | ||
; ALLOW-BEFORE-NEXT: ptr %r | ||
; ALLOW-BEFORE-NEXT: Grouped accesses: | ||
; ALLOW-BEFORE-NEXT: Group GRP0: | ||
; ALLOW-BEFORE-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p)) | ||
; ALLOW-BEFORE-NEXT: Member: {%p,+,4}<nuw><%while.body> | ||
; ALLOW-BEFORE-NEXT: Group GRP1: | ||
; ALLOW-BEFORE-NEXT: (Low: %r High: (8 + %r)) | ||
; ALLOW-BEFORE-NEXT: Member: %r | ||
; ALLOW-BEFORE-NEXT: Generated run-time checks are incomplete | ||
; ALLOW-BEFORE-EMPTY: | ||
; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; ALLOW-BEFORE-NEXT: SCEV assumptions: | ||
; ALLOW-BEFORE-EMPTY: | ||
; ALLOW-BEFORE-NEXT: Expressions re-written: | ||
; | ||
; ALLOW-BEFORE-LABEL: 'gep_loaded_offset' | ||
; ALLOW-BEFORE-NEXT: while.body: | ||
; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds | ||
; ALLOW-BEFORE-NEXT: Dependences: | ||
; ALLOW-BEFORE-NEXT: Run-time memory checks: | ||
; ALLOW-BEFORE-NEXT: Grouped accesses: | ||
; ALLOW-BEFORE-EMPTY: | ||
; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; ALLOW-BEFORE-NEXT: SCEV assumptions: | ||
; ALLOW-BEFORE-EMPTY: | ||
; ALLOW-BEFORE-NEXT: Expressions re-written: | ||
; | ||
; ALLOW-AFTER-LABEL: 'gep_loaded_offset' | ||
; ALLOW-AFTER-NEXT: while.body: | ||
; ALLOW-AFTER-NEXT: Report: cannot identify array bounds | ||
; ALLOW-AFTER-NEXT: Dependences: | ||
; ALLOW-AFTER-NEXT: Run-time memory checks: | ||
; ALLOW-AFTER-NEXT: Grouped accesses: | ||
; ALLOW-AFTER-EMPTY: | ||
; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; ALLOW-AFTER-NEXT: SCEV assumptions: | ||
; ALLOW-AFTER-EMPTY: | ||
; ALLOW-AFTER-NEXT: Expressions re-written: | ||
; | ||
; ALLOW-AFTER-LABEL: 'gep_loaded_offset' | ||
; ALLOW-AFTER-NEXT: while.body: | ||
; ALLOW-AFTER-NEXT: Report: cannot identify array bounds | ||
; ALLOW-AFTER-NEXT: Dependences: | ||
; ALLOW-AFTER-NEXT: Run-time memory checks: | ||
; ALLOW-AFTER-NEXT: Check 0: | ||
; ALLOW-AFTER-NEXT: Comparing group GRP0: | ||
; ALLOW-AFTER-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] | ||
; ALLOW-AFTER-NEXT: Against group GRP1: | ||
; ALLOW-AFTER-NEXT: ptr %r | ||
; ALLOW-AFTER-NEXT: Grouped accesses: | ||
; ALLOW-AFTER-NEXT: Group GRP0: | ||
; ALLOW-AFTER-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p)) | ||
; ALLOW-AFTER-NEXT: Member: {%p,+,4}<nuw><%while.body> | ||
; ALLOW-AFTER-NEXT: Group GRP1: | ||
; ALLOW-AFTER-NEXT: (Low: %r High: (8 + %r)) | ||
; ALLOW-AFTER-NEXT: Member: %r | ||
; ALLOW-AFTER-NEXT: Generated run-time checks are incomplete | ||
; ALLOW-AFTER-EMPTY: | ||
; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; ALLOW-AFTER-NEXT: SCEV assumptions: | ||
; ALLOW-AFTER-EMPTY: | ||
; ALLOW-AFTER-NEXT: Expressions re-written: | ||
; | ||
entry: | ||
br label %while.body | ||
|
||
while.body: | ||
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] | ||
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] | ||
%dec = add nsw i32 %n.addr, -1 | ||
%rval = load i64, ptr %r, align 4 | ||
%arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval | ||
%val = load i32, ptr %arrayidx, align 4 | ||
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 | ||
store i32 %val, ptr %p.addr, align 4 | ||
%tobool.not = icmp eq i32 %dec, 0 | ||
br i1 %tobool.not, label %while.end, label %while.body | ||
|
||
while.end: | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs some documentation. As AllowPartial is the default, it might be better to have the flag indicate that all runtime that can be generated will be collected?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about the naming?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure what you're asking here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just seemed like the comment might have been missed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment was added in 2b02a5b. It looks like github isn't showing the comment on the "conversations" tab but does on the "files changed" tab.