llvm · john-brawn-arm · Jun 4, 2025 · Mar 26, 2025 · May 15, 2025 · May 19, 2025
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -194,7 +194,8 @@ class MemoryDepChecker {
   /// of a write access.
   LLVM_ABI void addAccess(LoadInst *LI);
 
-  /// Check whether the dependencies between the accesses are safe.
+  /// Check whether the dependencies between the accesses are safe, and records
+  /// the dependence information in Dependences if so.
   ///
   /// Only checks sets with elements in \p CheckDeps.
   LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets,
@@ -654,7 +655,8 @@ class RuntimePointerChecking {
 /// For memory dependences that cannot be determined at compile time, it
 /// generates run-time checks to prove independence.  This is done by
 /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
-/// RuntimePointerCheck class.
+/// RuntimePointerCheck class. \p AllowPartial determines whether partial checks
+/// are generated when not all pointers could be analyzed.
 ///
 /// If pointers can wrap or can't be expressed as affine AddRec expressions by
 /// ScalarEvolution, we will generate run-time checks by emitting a
@@ -667,7 +669,8 @@ class LoopAccessInfo {
   LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                           const TargetTransformInfo *TTI,
                           const TargetLibraryInfo *TLI, AAResults *AA,
-                          DominatorTree *DT, LoopInfo *LI);
+                          DominatorTree *DT, LoopInfo *LI,
+                          bool AllowPartial = false);
 
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles. Note that for dependences between loads &
@@ -682,6 +685,11 @@ class LoopAccessInfo {
   /// not legal to insert them.
   bool hasConvergentOp() const { return HasConvergentOp; }
 
+  /// Return true if, when runtime pointer checking does not have complete
+  /// results, it instead has partial results for those memory accesses that
+  /// could be analyzed.
+  bool hasAllowPartial() const { return AllowPartial; }
+
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return PtrRtChecking.get();
   }
@@ -784,20 +792,30 @@ class LoopAccessInfo {
 
   /// We need to check that all of the pointers in this list are disjoint
   /// at runtime. Using std::unique_ptr to make using move ctor simpler.
+  /// If AllowPartial is true then this list may contain only partial
+  /// information when we've failed to analyze all the memory accesses in the
+  /// loop, in which case HasCompletePtrRtChecking will be false.
   std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
 
-  /// the Memory Dependence Checker which can determine the
+  /// The Memory Dependence Checker which can determine the
   /// loop-independent and loop-carried dependences between memory accesses.
+  /// This will be empty if we've failed to analyze all the memory access in the
+  /// loop (i.e. CanVecMem is false).
   std::unique_ptr<MemoryDepChecker> DepChecker;
 
   Loop *TheLoop;
 
+  /// Determines whether we should generate partial runtime checks when not all
+  /// memory accesses could be analyzed.
+  bool AllowPartial;
+
   unsigned NumLoads = 0;
   unsigned NumStores = 0;
 
   /// Cache the result of analyzeLoop.
   bool CanVecMem = false;
   bool HasConvergentOp = false;
+  bool HasCompletePtrRtChecking = false;
 
   /// Indicator that there are two non vectorizable stores to the same uniform
   /// address.
@@ -920,7 +938,7 @@ class LoopAccessInfoManager {
                         const TargetLibraryInfo *TLI)
       : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
 
-  LLVM_ABI const LoopAccessInfo &getInfo(Loop &L);
+  LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
 
   LLVM_ABI void clear();
 

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
@@ -20,9 +20,11 @@ class raw_ostream;
 class LoopAccessInfoPrinterPass
     : public PassInfoMixin<LoopAccessInfoPrinterPass> {
   raw_ostream &OS;
+  bool AllowPartial;
 
 public:
-  explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
+  explicit LoopAccessInfoPrinterPass(raw_ostream &OS, bool AllowPartial)
+      : OS(OS), AllowPartial(AllowPartial) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
   static bool isRequired() { return true; }
 };

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -530,8 +530,10 @@ void RuntimePointerChecking::groupChecks(
     // equivalence class, the iteration order is deterministic.
     for (auto M : DepCands.members(Access)) {
       auto PointerI = PositionMap.find(M.getPointer());
-      assert(PointerI != PositionMap.end() &&
-             "pointer in equivalence class not found in PositionMap");
+      // If we can't find the pointer in PositionMap that means we can't
+      // generate a memcheck for it.
+      if (PointerI == PositionMap.end())
+        continue;
       for (unsigned Pointer : PointerI->second) {
         bool Merged = false;
         // Mark this pointer as seen.
@@ -693,10 +695,13 @@ class AccessAnalysis {
   /// non-intersection.
   ///
   /// Returns true if we need no check or if we do and we can generate them
-  /// (i.e. the pointers have computable bounds).
+  /// (i.e. the pointers have computable bounds). A return value of false means
+  /// we couldn't analyze and generate runtime checks for all pointers in the
+  /// loop, but if \p AllowPartial is set then we will have checks for those
+  /// pointers we could analyze.
   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
                        const DenseMap<Value *, const SCEV *> &Strides,
-                       Value *&UncomputablePtr);
+                       Value *&UncomputablePtr, bool AllowPartial);
 
   /// Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
@@ -1181,8 +1186,8 @@ bool AccessAnalysis::createCheckForAccess(
 
 bool AccessAnalysis::canCheckPtrAtRT(
     RuntimePointerChecking &RtCheck, Loop *TheLoop,
-    const DenseMap<Value *, const SCEV *> &StridesMap,
-    Value *&UncomputablePtr) {
+    const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
+    bool AllowPartial) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
@@ -1275,7 +1280,8 @@ bool AccessAnalysis::canCheckPtrAtRT(
                                   /*Assume=*/true)) {
           CanDoAliasSetRT = false;
           UncomputablePtr = Access.getPointer();
-          break;
+          if (!AllowPartial)
+            break;
         }
       }
     }
@@ -1315,7 +1321,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
     }
   }
 
-  if (MayNeedRTCheck && CanDoRT)
+  if (MayNeedRTCheck && (CanDoRT || AllowPartial))
     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
 
   LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
@@ -1329,7 +1335,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
   bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
   assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
          "CanDoRTIfNeeded depends on RtCheck.Need");
-  if (!CanDoRTIfNeeded)
+  if (!CanDoRTIfNeeded && !AllowPartial)
     RtCheck.reset();
   return CanDoRTIfNeeded;
 }
@@ -2599,9 +2605,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   Value *UncomputablePtr = nullptr;
-  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
-      *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr);
-  if (!CanDoRTIfNeeded) {
+  HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
+      *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
+  if (!HasCompletePtrRtChecking) {
     const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
     recordAnalysis("CantIdentifyArrayBounds", I)
         << "cannot identify array bounds";
@@ -2629,11 +2635,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
       PtrRtChecking->Need = true;
 
       UncomputablePtr = nullptr;
-      CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
-          *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr);
+      HasCompletePtrRtChecking =
+          Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
+                                   UncomputablePtr, AllowPartial);
 
       // Check that we found the bounds for the pointer.
-      if (!CanDoRTIfNeeded) {
+      if (!HasCompletePtrRtChecking) {
         auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
         recordAnalysis("CantCheckMemDepsAtRunTime", I)
             << "cannot check memory dependencies at runtime";
@@ -2908,9 +2915,10 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetTransformInfo *TTI,
                                const TargetLibraryInfo *TLI, AAResults *AA,
-                               DominatorTree *DT, LoopInfo *LI)
+                               DominatorTree *DT, LoopInfo *LI,
+                               bool AllowPartial)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
-      PtrRtChecking(nullptr), TheLoop(L) {
+      PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
   unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
   if (TTI && !TTI->enableScalableVectorization())
     // Scale the vector width by 2 as rough estimate to also consider
@@ -2959,6 +2967,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
 
   // List the pair of accesses need run-time checks to prove independence.
   PtrRtChecking->print(OS, Depth);
+  if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
+    OS.indent(Depth) << "Generated run-time checks are incomplete\n";
   OS << "\n";
 
   OS.indent(Depth)
@@ -2978,12 +2988,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   PSE->print(OS, Depth);
 }
 
-const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
+const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
+                                                     bool AllowPartial) {
   const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
 
-  if (Inserted)
-    It->second =
-        std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
+  // We need to create the LoopAccessInfo if either we don't already have one,
+  // or if it was created with a different value of AllowPartial.
+  if (Inserted || It->second->hasAllowPartial() != AllowPartial)
+    It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
+                                                  &LI, AllowPartial);
 
   return *It->second;
 }

diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
@@ -441,7 +441,6 @@ FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs()))
 FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs()))
 FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs()))
 FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs()))
-FUNCTION_PASS("print<access-info>", LoopAccessInfoPrinterPass(errs()))
 FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(errs()))
 FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(errs()))
 FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(errs()))
@@ -583,6 +582,16 @@ FUNCTION_PASS_WITH_PARAMS(
       return MergedLoadStoreMotionPass(Opts);
     },
     parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb")
+FUNCTION_PASS_WITH_PARAMS(
+    "print<access-info>", "LoopAccessInfoPrinterPass",
+    [](bool AllowPartial) {
+      return LoopAccessInfoPrinterPass(errs(), AllowPartial);
+    },
+    [](StringRef Params) {
+      return PassBuilder::parseSinglePassOption(Params, "allow-partial",
+						"LoopAccessInfoPrinterPass");
+    },
+    "allow-partial")
 FUNCTION_PASS_WITH_PARAMS(
     "print<da>", "DependenceAnalysisPrinterPass",
     [](bool NormalizeResults) {

diff --git a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -28,7 +28,7 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Function &F,
   while (!Worklist.empty()) {
     Loop *L = Worklist.pop_back_val();
     OS.indent(2) << L->getHeader()->getName() << ":\n";
-    LAIs.getInfo(*L).print(OS, 4);
+    LAIs.getInfo(*L, AllowPartial).print(OS, 4);
   }
   return PreservedAnalyses::all();
 }
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -368,7 +368,7 @@ bool LoopVersioningLICM::legalLoopInstructions() {
   IsReadOnlyLoop = true;
   using namespace ore;
   // Get LoopAccessInfo from current loop via the proxy.
-  LAI = &LAIs.getInfo(*CurLoop);
+  LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true);
   // Check LoopAccessInfo for need of runtime check.
   if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
     LLVM_DEBUG(dbgs() << "    LAA: Runtime check not found !!\n");

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/allow-partial.ll b/llvm/test/Analysis/LoopAccessAnalysis/allow-partial.ll
@@ -0,0 +1,99 @@
+; RUN: opt -disable-output -passes='print<access-info><allow-partial>,print<access-info>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-BEFORE
+; RUN: opt -disable-output -passes='print<access-info>,print<access-info><allow-partial>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-AFTER
+
+; Check that we get the right results when loop access analysis is run twice,
+; once without partial results and once with.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+
+define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) {
+; ALLOW-BEFORE-LABEL: 'gep_loaded_offset'
+; ALLOW-BEFORE-NEXT:    while.body:
+; ALLOW-BEFORE-NEXT:      Report: cannot identify array bounds
+; ALLOW-BEFORE-NEXT:      Dependences:
+; ALLOW-BEFORE-NEXT:      Run-time memory checks:
+; ALLOW-BEFORE-NEXT:      Check 0:
+; ALLOW-BEFORE-NEXT:        Comparing group GRP0:
+; ALLOW-BEFORE-NEXT:          %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
+; ALLOW-BEFORE-NEXT:        Against group GRP1:
+; ALLOW-BEFORE-NEXT:        ptr %r
+; ALLOW-BEFORE-NEXT:      Grouped accesses:
+; ALLOW-BEFORE-NEXT:        Group GRP0:
+; ALLOW-BEFORE-NEXT:          (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p))
+; ALLOW-BEFORE-NEXT:            Member: {%p,+,4}<nuw><%while.body>
+; ALLOW-BEFORE-NEXT:        Group GRP1:
+; ALLOW-BEFORE-NEXT:          (Low: %r High: (8 + %r))
+; ALLOW-BEFORE-NEXT:            Member: %r
+; ALLOW-BEFORE-NEXT:      Generated run-time checks are incomplete
+; ALLOW-BEFORE-EMPTY:
+; ALLOW-BEFORE-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; ALLOW-BEFORE-NEXT:      SCEV assumptions:
+; ALLOW-BEFORE-EMPTY:
+; ALLOW-BEFORE-NEXT:      Expressions re-written:
+;
+; ALLOW-BEFORE-LABEL: 'gep_loaded_offset'
+; ALLOW-BEFORE-NEXT:    while.body:
+; ALLOW-BEFORE-NEXT:      Report: cannot identify array bounds
+; ALLOW-BEFORE-NEXT:      Dependences:
+; ALLOW-BEFORE-NEXT:      Run-time memory checks:
+; ALLOW-BEFORE-NEXT:      Grouped accesses:
+; ALLOW-BEFORE-EMPTY:
+; ALLOW-BEFORE-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; ALLOW-BEFORE-NEXT:      SCEV assumptions:
+; ALLOW-BEFORE-EMPTY:
+; ALLOW-BEFORE-NEXT:      Expressions re-written:
+;
+; ALLOW-AFTER-LABEL: 'gep_loaded_offset'
+; ALLOW-AFTER-NEXT:    while.body:
+; ALLOW-AFTER-NEXT:      Report: cannot identify array bounds
+; ALLOW-AFTER-NEXT:      Dependences:
+; ALLOW-AFTER-NEXT:      Run-time memory checks:
+; ALLOW-AFTER-NEXT:      Grouped accesses:
+; ALLOW-AFTER-EMPTY:
+; ALLOW-AFTER-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; ALLOW-AFTER-NEXT:      SCEV assumptions:
+; ALLOW-AFTER-EMPTY:
+; ALLOW-AFTER-NEXT:      Expressions re-written:
+;
+; ALLOW-AFTER-LABEL: 'gep_loaded_offset'
+; ALLOW-AFTER-NEXT:    while.body:
+; ALLOW-AFTER-NEXT:      Report: cannot identify array bounds
+; ALLOW-AFTER-NEXT:      Dependences:
+; ALLOW-AFTER-NEXT:      Run-time memory checks:
+; ALLOW-AFTER-NEXT:      Check 0:
+; ALLOW-AFTER-NEXT:        Comparing group GRP0:
+; ALLOW-AFTER-NEXT:          %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
+; ALLOW-AFTER-NEXT:        Against group GRP1:
+; ALLOW-AFTER-NEXT:        ptr %r
+; ALLOW-AFTER-NEXT:      Grouped accesses:
+; ALLOW-AFTER-NEXT:        Group GRP0:
+; ALLOW-AFTER-NEXT:          (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p))
+; ALLOW-AFTER-NEXT:            Member: {%p,+,4}<nuw><%while.body>
+; ALLOW-AFTER-NEXT:        Group GRP1:
+; ALLOW-AFTER-NEXT:          (Low: %r High: (8 + %r))
+; ALLOW-AFTER-NEXT:            Member: %r
+; ALLOW-AFTER-NEXT:      Generated run-time checks are incomplete
+; ALLOW-AFTER-EMPTY:
+; ALLOW-AFTER-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; ALLOW-AFTER-NEXT:      SCEV assumptions:
+; ALLOW-AFTER-EMPTY:
+; ALLOW-AFTER-NEXT:      Expressions re-written:
+;
+entry:
+  br label %while.body
+
+while.body:
+  %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %dec = add nsw i32 %n.addr, -1
+  %rval = load i64, ptr %r, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval
+  %val = load i32, ptr %arrayidx, align 4
+  %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
+  store i32 %val, ptr %p.addr, align 4
+  %tobool.not = icmp eq i32 %dec, 0
+  br i1 %tobool.not, label %while.end, label %while.body
+
+while.end:
+  ret void
+}