Skip to content

Commit b452efa

Browse files
committed
[clangd] [C++20] [Modules] Add scanning cache
Previously, everytime we want to get a source file declaring a specific module, we need to scan the whole projects again and again. The performance is not wanted. This patch tries to improve this by introducing a simple cache.
1 parent 8c222c1 commit b452efa

File tree

6 files changed

+160
-16
lines changed

6 files changed

+160
-16
lines changed

clang-tools-extra/clangd/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ add_clang_library(clangDaemon STATIC
9999
ModulesBuilder.cpp
100100
PathMapping.cpp
101101
Protocol.cpp
102+
ProjectModulesCache.cpp
102103
Quality.cpp
103104
ParsedAST.cpp
104105
Preamble.cpp

clang-tools-extra/clangd/ModulesBuilder.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -361,15 +361,16 @@ void ModuleFileCache::remove(StringRef ModuleName) {
361361
/// ModuleName in topological order. The \param ModuleName is guaranteed to
362362
/// be the last element in \param ModuleNames.
363363
llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
364-
StringRef ModuleName) {
364+
StringRef ModuleName,
365+
ProjectModulesCache &Cache) {
365366
llvm::SmallVector<llvm::StringRef> ModuleNames;
366367
llvm::StringSet<> ModuleNamesSet;
367368

368369
auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
369370
ModuleNamesSet.insert(ModuleName);
370371

371372
for (StringRef RequiredModuleName :
372-
MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
373+
MDB.getRequiredModules(MDB.getSourceForModuleName(Cache, ModuleName)))
373374
if (ModuleNamesSet.insert(RequiredModuleName).second)
374375
Visitor(RequiredModuleName, Visitor);
375376

@@ -384,7 +385,9 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
384385

385386
class ModulesBuilder::ModulesBuilderImpl {
386387
public:
387-
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
388+
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {
389+
MDBCache = createProjectModulesCache();
390+
}
388391

389392
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
390393

@@ -395,6 +398,7 @@ class ModulesBuilder::ModulesBuilderImpl {
395398

396399
private:
397400
ModuleFileCache Cache;
401+
std::unique_ptr<ProjectModulesCache> MDBCache;
398402
};
399403

400404
llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
@@ -403,7 +407,8 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
403407
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
404408
return llvm::Error::success();
405409

406-
PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
410+
PathRef ModuleUnitFileName =
411+
MDB.getSourceForModuleName(*MDBCache, ModuleName);
407412
/// It is possible that we're meeting third party modules (modules whose
408413
/// source are not in the project. e.g, the std module may be a third-party
409414
/// module for most project) or something wrong with the implementation of
@@ -416,7 +421,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
416421
llvm::formatv("Don't get the module unit for module {0}", ModuleName));
417422

418423
// Get Required modules in topological order.
419-
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
424+
auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName, *MDBCache);
420425
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
421426
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
422427
continue;

clang-tools-extra/clangd/ProjectModules.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULES_H
1010
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULES_H
1111

12+
#include "ProjectModulesCache.h"
1213
#include "support/Function.h"
1314
#include "support/Path.h"
1415
#include "support/ThreadsafeFS.h"
@@ -43,7 +44,7 @@ class ProjectModules {
4344

4445
virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
4546
virtual PathRef
46-
getSourceForModuleName(llvm::StringRef ModuleName,
47+
getSourceForModuleName(ProjectModulesCache &Cache, llvm::StringRef ModuleName,
4748
PathRef RequiredSrcFile = PathRef()) = 0;
4849

4950
virtual void setCommandMangler(CommandMangler Mangler) {}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//===------------------ ProjectModulesCache.cpp ------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "ProjectModulesCache.h"
10+
#include "llvm/ADT/StringMap.h"
11+
#include <mutex>
12+
13+
namespace clang::clangd {
14+
namespace {
15+
class SharedProjectModulesCache : public ProjectModulesCache {
16+
public:
17+
std::optional<std::string>
18+
getSourceForModuleName(llvm::StringRef ModuleName,
19+
PathRef RequiredSrcFile = PathRef()) override {
20+
std::lock_guard<std::mutex> Lock(Mutex);
21+
22+
auto Iter = ModuleNameToSource.find(ModuleName);
23+
if (Iter == ModuleNameToSource.end())
24+
return std::nullopt;
25+
26+
return Iter->second;
27+
}
28+
29+
void clearEntry(llvm::StringRef ModuleName,
30+
PathRef RequiredSrcFile = PathRef()) override {
31+
std::lock_guard<std::mutex> Lock(Mutex);
32+
33+
auto Iter = ModuleNameToSource.find(ModuleName);
34+
if (Iter == ModuleNameToSource.end())
35+
return;
36+
37+
ModuleNameToSource.erase(Iter);
38+
}
39+
40+
void setEntry(PathRef FilePath, llvm::StringRef ModuleName) override {
41+
std::lock_guard<std::mutex> Lock(Mutex);
42+
43+
ModuleNameToSource[ModuleName] = FilePath;
44+
}
45+
46+
private:
47+
std::mutex Mutex;
48+
llvm::StringMap<std::string> ModuleNameToSource;
49+
};
50+
} // namespace
51+
52+
std::unique_ptr<ProjectModulesCache> createProjectModulesCache() {
53+
return std::make_unique<SharedProjectModulesCache>();
54+
}
55+
56+
} // namespace clang::clangd
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===------------------ ProjectModulesCache.h --------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULESCACHE_H
10+
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROJECTMODULESCACHE_H
11+
12+
#include "support/Path.h"
13+
#include <memory>
14+
15+
namespace clang {
16+
namespace clangd {
17+
18+
/// A cache for the module name to file name map in the project.
19+
/// The rationale is:
20+
/// (1) It is fast to get the module name to a file.
21+
/// (2) It may be slow to get a file with a specified module name.
22+
/// (3) The module name of files may not change drastically and frequently.
23+
///
24+
/// The cache itself is not responsible for the validness of cached result.
25+
/// Users of the cache should check it after getting the result and updating
26+
/// the cache if the result is invalid.
27+
class ProjectModulesCache {
28+
public:
29+
virtual ~ProjectModulesCache() = default;
30+
31+
virtual std::optional<std::string>
32+
getSourceForModuleName(llvm::StringRef ModuleName,
33+
PathRef RequiredSrcFile = PathRef()) = 0;
34+
35+
virtual void clearEntry(llvm::StringRef ModuleName,
36+
PathRef RequiredSrcFile = PathRef()) = 0;
37+
38+
virtual void setEntry(PathRef FilePath, llvm::StringRef ModuleName) = 0;
39+
};
40+
41+
std::unique_ptr<ProjectModulesCache> createProjectModulesCache();
42+
43+
} // namespace clangd
44+
} // namespace clang
45+
46+
#endif

clang-tools-extra/clangd/ScanningProjectModules.cpp

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ class ModuleDependencyScanner {
4949

5050
/// Scanning the single file specified by \param FilePath.
5151
std::optional<ModuleDependencyInfo>
52-
scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler);
52+
scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler,
53+
ProjectModulesCache *Cache);
5354

5455
/// Scanning every source file in the current project to get the
5556
/// <module-name> to <module-unit-source> map.
@@ -58,7 +59,8 @@ class ModuleDependencyScanner {
5859
/// a global module dependency scanner to monitor every file. Or we
5960
/// can simply require the build systems (or even the end users)
6061
/// to provide the map.
61-
void globalScan(const ProjectModules::CommandMangler &Mangler);
62+
void globalScan(const ProjectModules::CommandMangler &Mangler,
63+
ProjectModulesCache &Cache);
6264

6365
/// Get the source file from the module name. Note that the language
6466
/// guarantees all the module names are unique in a valid program.
@@ -68,6 +70,12 @@ class ModuleDependencyScanner {
6870
/// declaring the same module.
6971
PathRef getSourceForModuleName(llvm::StringRef ModuleName) const;
7072

73+
/// Validate if source file \c Source declare a module with \c ModuleName.
74+
/// If yes, return the source file path. Otherwise, return std::nullopt.
75+
std::optional<PathRef>
76+
validateSourceForModuleName(PathRef Source, llvm::StringRef ModuleName,
77+
const ProjectModules::CommandMangler &Mangler);
78+
7179
/// Return the direct required modules. Indirect required modules are not
7280
/// included.
7381
std::vector<std::string>
@@ -83,15 +91,14 @@ class ModuleDependencyScanner {
8391

8492
clang::tooling::dependencies::DependencyScanningService Service;
8593

86-
// TODO: Add a scanning cache.
87-
8894
// Map module name to source file path.
8995
llvm::StringMap<std::string> ModuleNameToSource;
9096
};
9197

9298
std::optional<ModuleDependencyScanner::ModuleDependencyInfo>
9399
ModuleDependencyScanner::scan(PathRef FilePath,
94-
const ProjectModules::CommandMangler &Mangler) {
100+
const ProjectModules::CommandMangler &Mangler,
101+
ProjectModulesCache *Cache) {
95102
auto Candidates = CDB->getCompileCommands(FilePath);
96103
if (Candidates.empty())
97104
return std::nullopt;
@@ -124,6 +131,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
124131
if (ScanningResult->Provides) {
125132
ModuleNameToSource[ScanningResult->Provides->ModuleName] = FilePath;
126133
Result.ModuleName = ScanningResult->Provides->ModuleName;
134+
135+
if (Cache)
136+
Cache->setEntry(FilePath, ScanningResult->Provides->ModuleName);
127137
}
128138

129139
for (auto &Required : ScanningResult->Requires)
@@ -133,9 +143,9 @@ ModuleDependencyScanner::scan(PathRef FilePath,
133143
}
134144

135145
void ModuleDependencyScanner::globalScan(
136-
const ProjectModules::CommandMangler &Mangler) {
146+
const ProjectModules::CommandMangler &Mangler, ProjectModulesCache &Cache) {
137147
for (auto &File : CDB->getAllFiles())
138-
scan(File, Mangler);
148+
scan(File, Mangler, &Cache);
139149

140150
GlobalScanned = true;
141151
}
@@ -155,12 +165,28 @@ PathRef ModuleDependencyScanner::getSourceForModuleName(
155165

156166
std::vector<std::string> ModuleDependencyScanner::getRequiredModules(
157167
PathRef File, const ProjectModules::CommandMangler &Mangler) {
158-
auto ScanningResult = scan(File, Mangler);
168+
auto ScanningResult = scan(File, Mangler, /*Cache=*/nullptr);
159169
if (!ScanningResult)
160170
return {};
161171

162172
return ScanningResult->RequiredModules;
163173
}
174+
175+
std::optional<PathRef> ModuleDependencyScanner::validateSourceForModuleName(
176+
PathRef Source, llvm::StringRef ModuleName,
177+
const ProjectModules::CommandMangler &Mangler) {
178+
auto ScanningResult = scan(Source, Mangler, /*Cache=*/nullptr);
179+
if (!ScanningResult)
180+
return std::nullopt;
181+
182+
// If the name matches, return the source file path from ModuleNameToSource
183+
// cache instead of the input. Since the lifetime of the input may not be long
184+
// enough.
185+
if (ScanningResult->ModuleName == ModuleName)
186+
return ModuleNameToSource[ModuleName];
187+
188+
return std::nullopt;
189+
}
164190
} // namespace
165191

166192
/// TODO: The existing `ScanningAllProjectModules` is not efficient. See the
@@ -190,9 +216,18 @@ class ScanningAllProjectModules : public ProjectModules {
190216
/// RequiredSourceFile is not used intentionally. See the comments of
191217
/// ModuleDependencyScanner for detail.
192218
PathRef
193-
getSourceForModuleName(llvm::StringRef ModuleName,
219+
getSourceForModuleName(ProjectModulesCache &Cache, llvm::StringRef ModuleName,
194220
PathRef RequiredSourceFile = PathRef()) override {
195-
Scanner.globalScan(Mangler);
221+
if (auto Source =
222+
Cache.getSourceForModuleName(ModuleName, RequiredSourceFile)) {
223+
if (std::optional<PathRef> Path =
224+
Scanner.validateSourceForModuleName(*Source, ModuleName, Mangler))
225+
return *Path;
226+
227+
Cache.clearEntry(ModuleName, RequiredSourceFile);
228+
}
229+
230+
Scanner.globalScan(Mangler, Cache);
196231
return Scanner.getSourceForModuleName(ModuleName);
197232
}
198233

0 commit comments

Comments
 (0)