[clangd] Elide even more checks in SelectionTree.

sam-mccall · sam-mccall · commit 07f9fb8b5141 · 2022-01-13T22:27:51.000+01:00
During pop() we convert nodes into spans of expanded syntax::Tokens. If we precompute a range of plausible (expanded) tokens, then we can do an extremely cheap approximate hit-test against it, because syntax::Tokens are ordered by pointer. This would seem not to buy anything (we don't enter nodes unless they overlap the selection), but in fact the spans we have are for *newly* claimed ranges (i.e. those unclaimed by any child node). So if you have: { { [[2+2]]; } } then all of the CompoundStmts pass the hit test and are pushed, but we skip full hit-testing of the brackets during pop() as they lie outside the range. This is ~10x average speedup for selectiontree on a bad case I've seen (large gtest file). Differential Revision: https://reviews.llvm.org/D117107
diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp
@@ -274,22 +274,37 @@ class SelectionTester {
     for (unsigned I = 0; I < Sel.size(); ++I) {
       if (shouldIgnore(Sel[I]) || PPIgnored[I])
         continue;
-      SpelledTokens.emplace_back();
-      Tok &S = SpelledTokens.back();
+      SelectedSpelled.emplace_back();
+      Tok &S = SelectedSpelled.back();
       S.Offset = SM.getFileOffset(Sel[I].location());
       if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
         S.Selected = SelectionTree::Complete;
       else
         S.Selected = SelectionTree::Partial;
     }
+    MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Buf);
   }
 
   // Test whether a consecutive range of tokens is selected.
   // The tokens are taken from the expanded token stream.
   SelectionTree::Selection
   test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
-    if (SpelledTokens.empty())
+    if (ExpandedTokens.empty())
       return NoTokens;
+    if (SelectedSpelled.empty())
+      return SelectionTree::Unselected;
+    // Cheap (pointer) check whether any of the tokens could touch selection.
+    // In most cases, the node's overall source range touches ExpandedTokens,
+    // or we would have failed mayHit(). However now we're only considering
+    // the *unclaimed* spans of expanded tokens.
+    // This is a significant performance improvement when a lot of nodes
+    // surround the selection, including when generated by macros.
+    if (MaybeSelectedExpanded.empty() ||
+        &ExpandedTokens.front() > &MaybeSelectedExpanded.back() ||
+        &ExpandedTokens.back() < &MaybeSelectedExpanded.front()) {
+      return SelectionTree::Unselected;
+    }
+
     SelectionTree::Selection Result = NoTokens;
     while (!ExpandedTokens.empty()) {
       // Take consecutive tokens from the same context together for efficiency.
@@ -312,14 +327,14 @@ class SelectionTester {
   // If it returns false, test() will return NoTokens or Unselected.
   // If it returns true, test() may return any value.
   bool mayHit(SourceRange R) const {
-    if (SpelledTokens.empty())
+    if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty())
       return false;
     // If the node starts after the selection ends, it is not selected.
     // Tokens a macro location might claim are >= its expansion start.
     // So if the expansion start > last selected token, we can prune it.
     // (This is particularly helpful for GTest's TEST macro).
     if (auto B = offsetInSelFile(getExpansionStart(R.getBegin())))
-      if (*B > SpelledTokens.back().Offset)
+      if (*B > SelectedSpelled.back().Offset)
         return false;
     // If the node ends before the selection begins, it is not selected.
     SourceLocation EndLoc = R.getEnd();
@@ -328,12 +343,72 @@ class SelectionTester {
     // In the rare case that the expansion range is a char range, EndLoc is
     // ~one token too far to the right. We may fail to prune, that's OK.
     if (auto E = offsetInSelFile(EndLoc))
-      if (*E < SpelledTokens.front().Offset)
+      if (*E < SelectedSpelled.front().Offset)
         return false;
     return true;
   }
 
 private:
+  // Plausible expanded tokens that might be affected by the selection.
+  // This is an overestimate, it may contain tokens that are not selected.
+  // The point is to allow cheap pruning in test()
+  llvm::ArrayRef<syntax::Token>
+  computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) {
+    if (SelectedSpelled.empty())
+      return {};
+
+    bool StartInvalid = false;
+    const syntax::Token *Start = llvm::partition_point(
+        Toks.expandedTokens(),
+        [&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) {
+          if (Tok.kind() == tok::eof)
+            return false;
+          // Implausible if upperbound(Tok) < First.
+          SourceLocation Loc = Tok.location();
+          auto Offset = offsetInSelFile(Loc);
+          while (Loc.isValid() && !Offset) {
+            Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd()
+                                  : SM.getIncludeLoc(SM.getFileID(Loc));
+            Offset = offsetInSelFile(Loc);
+          }
+          if (Offset)
+            return *Offset < First;
+          StartInvalid = true;
+          return false; // conservatively assume this token can overlap
+        });
+    if (StartInvalid) {
+      assert(false && "Expanded tokens could not be resolved to main file!");
+      Start = Toks.expandedTokens().begin();
+    }
+
+    bool EndInvalid = false;
+    const syntax::Token *End = llvm::partition_point(
+        Toks.expandedTokens(),
+        [&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) {
+          if (Tok.kind() == tok::eof)
+            return false;
+          // Plausible if lowerbound(Tok) <= Last.
+          SourceLocation Loc = Tok.location();
+          auto Offset = offsetInSelFile(Loc);
+          while (Loc.isValid() && !Offset) {
+            Loc = Loc.isMacroID()
+                      ? SM.getImmediateExpansionRange(Loc).getBegin()
+                      : SM.getIncludeLoc(SM.getFileID(Loc));
+            Offset = offsetInSelFile(Loc);
+          }
+          if (Offset)
+            return *Offset <= Last;
+          EndInvalid = true;
+          return true; // conservatively assume this token can overlap
+        });
+    if (EndInvalid) {
+      assert(false && "Expanded tokens could not be resolved to main file!");
+      End = Toks.expandedTokens().end();
+    }
+
+    return llvm::makeArrayRef(Start, End);
+  }
+
   // Hit-test a consecutive range of tokens from a single file ID.
   SelectionTree::Selection
   testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
@@ -389,19 +464,20 @@ class SelectionTester {
   SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
     assert(Begin <= End);
     // Outside the selection entirely?
-    if (End < SpelledTokens.front().Offset ||
-        Begin > SpelledTokens.back().Offset)
+    if (End < SelectedSpelled.front().Offset ||
+        Begin > SelectedSpelled.back().Offset)
       return SelectionTree::Unselected;
 
     // Compute range of tokens.
     auto B = llvm::partition_point(
-        SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
-    auto E = std::partition_point(
-        B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
+        SelectedSpelled, [&](const Tok &T) { return T.Offset < Begin; });
+    auto E = std::partition_point(B, SelectedSpelled.end(), [&](const Tok &T) {
+      return T.Offset <= End;
+    });
 
     // Aggregate selectedness of tokens in range.
-    bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
-                                   End > SpelledTokens.back().Offset;
+    bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset ||
+                                   End > SelectedSpelled.back().Offset;
     SelectionTree::Selection Result =
         ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
     for (auto It = B; It != E; ++It)
@@ -412,13 +488,13 @@ class SelectionTester {
   // Is the token at `Offset` selected?
   SelectionTree::Selection testToken(unsigned Offset) const {
     // Outside the selection entirely?
-    if (Offset < SpelledTokens.front().Offset ||
-        Offset > SpelledTokens.back().Offset)
+    if (Offset < SelectedSpelled.front().Offset ||
+        Offset > SelectedSpelled.back().Offset)
       return SelectionTree::Unselected;
     // Find the token, if it exists.
     auto It = llvm::partition_point(
-        SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
-    if (It != SpelledTokens.end() && It->Offset == Offset)
+        SelectedSpelled, [&](const Tok &T) { return T.Offset < Offset; });
+    if (It != SelectedSpelled.end() && It->Offset == Offset)
       return It->Selected;
     return NoTokens;
   }
@@ -444,7 +520,8 @@ class SelectionTester {
     unsigned Offset;
     SelectionTree::Selection Selected;
   };
-  std::vector<Tok> SpelledTokens;
+  std::vector<Tok> SelectedSpelled;
+  llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded;
   FileID SelFile;
   SourceRange SelFileBounds;
   const SourceManager &SM;
diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -201,6 +201,13 @@ TEST(SelectionTest, CommonAncestor) {
           )cpp",
           nullptr,
       },
+      {
+          R"cpp(
+            #define TARGET void foo()
+            [[TAR^GET{ return; }]]
+          )cpp",
+          "FunctionDecl",
+      },
       {
           R"cpp(
             struct S { S(const char*); };