Skip to content

[clangd] Drop the optimization where only shards for modified files are updated in the background index #140651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 6 additions & 40 deletions clang-tools-extra/clangd/index/Background.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,10 @@ void BackgroundIndex::boostRelated(llvm::StringRef Path) {
Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
}

/// Given index results from a TU, only update symbols coming from files that
/// are different or missing from than \p ShardVersionsSnapshot. Also stores new
/// index information on IndexStorage.
void BackgroundIndex::update(
llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
bool HadErrors) {
/// Given index results from a TU, update symbols coming from all files. Also
/// stores new index information on IndexStorage.
void BackgroundIndex::update(llvm::StringRef MainFile, IndexFileIn Index,
bool HadErrors) {
// Keys are URIs.
llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
// Note that sources do not contain any information regarding missing headers,
Expand All @@ -198,12 +195,7 @@ void BackgroundIndex::update(
elog("Failed to resolve URI: {0}", AbsPath.takeError());
continue;
}
const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
// File has different contents, or indexing was successful this time.
if (DigestIt == ShardVersionsSnapshot.end() ||
DigestIt->getValue().Digest != IGN.Digest ||
(DigestIt->getValue().HadErrors && !HadErrors))
FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
}

// Shard slabs into files.
Expand Down Expand Up @@ -263,13 +255,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
return llvm::errorCodeToError(Buf.getError());
auto Hash = digest(Buf->get()->getBuffer());

// Take a snapshot of the versions to avoid locking for each file in the TU.
llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
{
std::lock_guard<std::mutex> Lock(ShardVersionsMu);
ShardVersionsSnapshot = ShardVersions;
}

vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
ParseInputs Inputs;
Inputs.TFS = &TFS;
Expand All @@ -286,25 +271,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
return error("Couldn't build compiler instance");

SymbolCollector::Options IndexOpts;
// Creates a filter to not collect index results from files with unchanged
// digests.
IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
FileID FID) {
const auto F = SM.getFileEntryRefForID(FID);
if (!F)
return false; // Skip invalid files.
auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
if (!AbsPath)
return false; // Skip files without absolute path.
auto Digest = digestFile(SM, FID);
if (!Digest)
return false;
auto D = ShardVersionsSnapshot.find(*AbsPath);
if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
!D->second.HadErrors)
return false; // Skip files that haven't changed, without errors.
return true;
};
IndexOpts.CollectMainFileRefs = true;

IndexFileIn Index;
Expand Down Expand Up @@ -345,7 +311,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
for (auto &It : *Index.Sources)
It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
}
update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
update(AbsolutePath, std::move(Index), HadErrors);

Rebuilder.indexedTU();
return llvm::Error::success();
Expand Down
9 changes: 3 additions & 6 deletions clang-tools-extra/clangd/index/Background.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,9 @@ class BackgroundIndex : public SwapIndex {
bool HadErrors = false;
};

/// Given index results from a TU, only update symbols coming from files with
/// different digests than \p ShardVersionsSnapshot. Also stores new index
/// information on IndexStorage.
void update(llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
bool HadErrors);
/// Given index results from a TU, update symbols coming from all files. Also
/// stores new index information on IndexStorage.
void update(llvm::StringRef MainFile, IndexFileIn Index, bool HadErrors);

// configuration
const ThreadsafeFS &TFS;
Expand Down
158 changes: 156 additions & 2 deletions clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <deque>
#include <optional>
#include <thread>

using ::testing::_;
Expand Down Expand Up @@ -213,10 +214,11 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) {
CDB.setCompileCommand(testPath("root/B.cc"), Cmd);

ASSERT_TRUE(Idx.blockUntilIdleForTest());
// B_CC is dropped as we don't collect symbols from A.h in this compilation.
// A_CC is dropped as A.h was now most recently indexed in the context of
// B.cc.
EXPECT_THAT(runFuzzyFind(Idx, ""),
UnorderedElementsAre(AllOf(named("common"), numReferences(5U)),
AllOf(named("A_CC"), numReferences(0U)),
AllOf(named("B_CC"), numReferences(0U)),
AllOf(named("g"), numReferences(1U)),
AllOf(named("f_b"), declared(), defined(),
numReferences(1U))));
Expand Down Expand Up @@ -682,6 +684,158 @@ TEST_F(BackgroundIndexTest, Reindex) {
EXPECT_EQ(OldShard, Storage.lookup(testPath("A.cc")));
}

// Test that restarting clangd properly updates the index with changes
// to files since the last time the index was built.
TEST_F(BackgroundIndexTest, UpdateAfterRestart) {
MockFS FS;
llvm::StringMap<std::string> Storage;
size_t CacheHits = 0;
MemoryShardStorage MSS(Storage, CacheHits);
auto CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
auto Idx = std::make_unique<BackgroundIndex>(
FS, *CDB, [&](llvm::StringRef) { return &MSS; },
BackgroundIndex::Options{});

// Create a header file containing a function declaration, and a source file
// containing a call to the function.
FS.Files[testPath("test.h")] = R"cpp(
#ifndef TEST_H
#define TEST_H
void waldo(int);
#endif
)cpp";
FS.Files[testPath("test.cc")] = R"cpp(
#include "test.h"
int main() {
waldo(42);
}
)cpp";

// Index the files in this state.
tooling::CompileCommand Cmd;
Cmd.Filename = "../test.cc";
Cmd.Directory = testPath("build");
Cmd.CommandLine = {"clang++", "../test.cc", "-fsyntax-only"};
CDB->setCompileCommand(testPath("test.cc"), Cmd);
ASSERT_TRUE(Idx->blockUntilIdleForTest());

// Verify that the function 'waldo' has two references in the index
// (the declaration, and the call site).
auto CheckRefCount = [&](std::string SymbolName) {
auto Syms = runFuzzyFind(*Idx, SymbolName);
EXPECT_THAT(Syms, UnorderedElementsAre(named(SymbolName)));
auto Sym = *Syms.begin();
return getRefs(*Idx, Sym.ID).numRefs();
};
EXPECT_EQ(CheckRefCount("waldo"), 2u);

// Modify the declaration of 'waldo' in a way that changes its SymbolID
// without changing how existing call sites are written. Here, we add
// a new parameter with a default argument.
FS.Files[testPath("test.h")] = R"cpp(
#ifndef TEST_H
#define TEST_H
void waldo(int, int = 0);
#endif
)cpp";

// Simulate clangd shutting down and restarting, and the background index
// being rebuilt after restart.
Idx = nullptr;
CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
Idx = std::make_unique<BackgroundIndex>(
FS, *CDB, [&](llvm::StringRef) { return &MSS; },
BackgroundIndex::Options{});
CDB->setCompileCommand(testPath("test.cc"), Cmd);
ASSERT_TRUE(Idx->blockUntilIdleForTest());

// The rebuild should have updated things so that 'waldo' now again has
// two references in the index.
EXPECT_EQ(CheckRefCount("waldo"), 2u);
}

// Test that updating a header that's included by multiple source files
// correctly updates all including source files, even if the source files
// themselves did not change.
TEST_F(BackgroundIndexTest, HeaderWithMultipleIncluders) {
MockFS FS;
llvm::StringMap<std::string> Storage;
size_t CacheHits = 0;
MemoryShardStorage MSS(Storage, CacheHits);
auto CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
auto Idx = std::make_unique<BackgroundIndex>(
FS, *CDB, [&](llvm::StringRef) { return &MSS; },
BackgroundIndex::Options{});

// Create a header file containing a function declaration, and two source
// files each containing a call to the function.
FS.Files[testPath("header.h")] = R"cpp(
#ifndef TEST_H
#define TEST_H
void waldo(int);
#endif
)cpp";
FS.Files[testPath("a.cc")] = R"cpp(
#include "header.h"
void f() {
waldo(41);
}
)cpp";
FS.Files[testPath("b.cc")] = R"cpp(
#include "header.h"
void f() {
waldo(42);
}
)cpp";

// Index the files in this state.
tooling::CompileCommand Cmd1;
Cmd1.Filename = "../a.cc";
Cmd1.Directory = testPath("build");
Cmd1.CommandLine = {"clang++", "../a.cc", "-fsyntax-only"};
tooling::CompileCommand Cmd2;
Cmd2.Filename = "../b.cc";
Cmd2.Directory = testPath("build");
Cmd2.CommandLine = {"clang++", "../b.cc", "-fsyntax-only"};
CDB->setCompileCommand(testPath("b.cc"), Cmd2);
ASSERT_TRUE(Idx->blockUntilIdleForTest());

// Verify that the function 'waldo' has three references in the index
// (the declaration, and the two call sites).
auto CheckRefCount = [&](std::string SymbolName) {
auto Syms = runFuzzyFind(*Idx, SymbolName);
EXPECT_THAT(Syms, UnorderedElementsAre(named(SymbolName)));
auto Sym = *Syms.begin();
return getRefs(*Idx, Sym.ID).numRefs();
};
EXPECT_EQ(CheckRefCount("waldo"), 3u);

// Modify the declaration of 'waldo' in a way that changes its SymbolID
// without changing how existing call sites are written. Here, we add
// a new parameter with a default argument.
FS.Files[testPath("test.h")] = R"cpp(
#ifndef TEST_H
#define TEST_H
void waldo(int, int = 0);
#endif
)cpp";

// Simulate clangd shutting down and restarting, and the background index
// being rebuilt after restart.
Idx = nullptr;
CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
Idx = std::make_unique<BackgroundIndex>(
FS, *CDB, [&](llvm::StringRef) { return &MSS; },
BackgroundIndex::Options{});
CDB->setCompileCommand(testPath("a.cc"), Cmd1);
CDB->setCompileCommand(testPath("b.cc"), Cmd2);
ASSERT_TRUE(Idx->blockUntilIdleForTest());

// The rebuild should have updated things so that 'waldo' now again has
// three references in the index.
EXPECT_EQ(CheckRefCount("waldo"), 3u);
}

class BackgroundIndexRebuilderTest : public testing::Test {
protected:
BackgroundIndexRebuilderTest()
Expand Down
Loading