Skip to content

Conversation

llvmbot
Copy link
Member

@llvmbot llvmbot commented Mar 6, 2025

Backport 86cf4ed

Requested by: @davemgreen

@llvmbot
Copy link
Member Author

llvmbot commented Mar 6, 2025

@DanielKristofKiss What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Mar 6, 2025

@llvm/pr-subscribers-backend-arm

Author: None (llvmbot)

Changes

Backport 86cf4ed

Requested by: @davemgreen


Full diff: https://github.com/llvm/llvm-project/pull/130035.diff

1 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+38-29)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bd8d6079e1ba8..d20115c84ea89 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
   cl::desc("Maximum interleave factor for MVE VLDn to generate."),
   cl::init(2));
 
+cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
+    "arm-max-base-updates-to-check", cl::Hidden,
+    cl::desc("Maximum number of base-updates to check generating postindex."),
+    cl::init(64));
+
 /// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
 constexpr MVT FlagsVT = MVT::i32;
 
@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {
   unsigned ConstInc;
 };
 
+static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
+  // Check that the add is independent of the load/store.
+  // Otherwise, folding it would create a cycle. Search through Addr
+  // as well, since the User may not be a direct user of Addr and
+  // only share a base pointer.
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+  Worklist.push_back(N);
+  Worklist.push_back(User);
+  const unsigned MaxSteps = 1024;
+  if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
+      SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
+    return false;
+  return true;
+}
+
 static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
                                  struct BaseUpdateUser &User,
                                  bool SimpleConstIncOnly,
@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
   if (SimpleConstIncOnly && User.ConstInc != NumBytes)
     return false;
 
+  if (!isValidBaseUpdate(N, User.N))
+    return false;
+
   // OK, we found an ADD we can fold into the base update.
   // Now, create a _UPD node, taking care of not breaking alignment.
 
@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
   }
 }
 
-static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
-  // Check that the add is independent of the load/store.
-  // Otherwise, folding it would create a cycle. Search through Addr
-  // as well, since the User may not be a direct user of Addr and
-  // only share a base pointer.
-  SmallPtrSet<const SDNode *, 32> Visited;
-  SmallVector<const SDNode *, 16> Worklist;
-  Worklist.push_back(N);
-  Worklist.push_back(User);
-  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
-      SDNode::hasPredecessorHelper(User, Visited, Worklist))
-    return false;
-  return true;
-}
-
 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
 /// NEON load/store intrinsics, and generic vector load/stores, to merge
 /// base address updates.
@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
   const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
   BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
 
+  // Limit the number of possible base-updates we look at to prevent degenerate
+  // cases.
+  unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
+
   SDValue Addr = N->getOperand(AddrOpIdx);
 
   SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
     unsigned ConstInc =
         getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
 
-    if (ConstInc || User->getOpcode() == ISD::ADD)
+    if (ConstInc || User->getOpcode() == ISD::ADD) {
       BaseUpdates.push_back({User, Inc, ConstInc});
+      if (BaseUpdates.size() >= MaxBaseUpdates)
+        break;
+    }
   }
 
   // If the address is a constant pointer increment itself, find
@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
       unsigned NewConstInc = UserOffset - Offset;
       SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
       BaseUpdates.push_back({User, NewInc, NewConstInc});
+      if (BaseUpdates.size() >= MaxBaseUpdates)
+        break;
     }
   }
 
   // Try to fold the load/store with an update that matches memory
   // access size. This should work well for sequential loads.
-  //
-  // Filter out invalid updates as well.
   unsigned NumValidUpd = BaseUpdates.size();
-  for (unsigned I = 0; I < NumValidUpd;) {
+  for (unsigned I = 0; I < NumValidUpd; I++) {
     BaseUpdateUser &User = BaseUpdates[I];
-    if (!isValidBaseUpdate(N, User.N)) {
-      --NumValidUpd;
-      std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
-      continue;
-    }
-
     if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
       return SDValue();
-    ++I;
   }
-  BaseUpdates.resize(NumValidUpd);
 
   // Try to fold with other users. Non-constant updates are considered
   // first, and constant updates are sorted to not break a sequence of
@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
     Visited.insert(Addr.getNode());
     Worklist.push_back(N);
     Worklist.push_back(User);
-    if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
-        SDNode::hasPredecessorHelper(User, Visited, Worklist))
+    const unsigned MaxSteps = 1024;
+    if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
+        SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
       continue;
 
     // Find the new opcode for the updating load/store.

@tstellar tstellar moved this from Needs Triage to Needs Review in LLVM Release Status Mar 11, 2025
@tstellar
Copy link
Collaborator

@DanielKristofKiss ping

Copy link
Member

@DanielKristofKiss DanielKristofKiss left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGMT

@github-project-automation github-project-automation bot moved this from Needs Review to Needs Merge in LLVM Release Status Mar 31, 2025
This attempts to put limits onto CombineBaseUpdate for degenerate cases
like llvm#127477. The biggest change is to add a limit to the number of base
updates to check in CombineBaseUpdate. 64 is hopefully plenty high
enough for most runtime unrolled loops to generate postinc where they
are beneficial.

It also moves the check for isValidBaseUpdate later so that it only
happens if we will generate a valid instruction. The 1024 limit to
hasPredecessorHelper comes from the X86 backend, which uses the same
limit.

I haven't added a test case as it would need to be very big and my
attempts at generating a smaller version did not show anything useful.

Fixes llvm#127477.

(cherry picked from commit 86cf4ed)
@tstellar tstellar merged commit d6d1dbf into llvm:release/20.x Mar 31, 2025
6 of 9 checks passed
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Mar 31, 2025
Copy link

@davemgreen (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants