Skip to content

Commit 5156da2

Browse files
committed
[clang][modules] Fix filesystem races in ModuleManager
The `ModuleManager` uses `FileEntry` objects to uniquely identify module files. This requires first consulting the `FileManager` (and therefore the file system) when loading PCM files. This is problematic, as this may load a different PCM file to what's already in the `InMemoryModuleCache` and fail the size and mtime checks. This PR changes things so that module files are identified by their file system path. This removes the need of knowing the file entry at the start and allows us to fix the race. The downside is that we no longer get the `FileManager` inode-based uniquing, meaning symlinks in the module cache no longer work. I think this is fine, since Clang itself never creates symlinks in that directory. Moreover, we already had to work around filesystems recycling inode numbers, so this actually seems like a win to me (and resolves a long-standing FIXME-like comment). Note that this change also requires that all places calling into `ModuleManager` from within a single `CompilerInstance` use consistent paths to refer to module files. This might be problematic if there are concurrent Clang processes operating on the same module cache directory but with different spellings - the `IMPORT` records in PCM files will be valid and consistent within single process, but may break uniquing in another process. We might need to do some canonicalization of the module cache paths to avoid this issue.
1 parent da3ee97 commit 5156da2

File tree

2 files changed

+60
-129
lines changed

2 files changed

+60
-129
lines changed

clang/include/clang/Serialization/ModuleManager.h

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/STLExtras.h"
2323
#include "llvm/ADT/SmallPtrSet.h"
2424
#include "llvm/ADT/SmallVector.h"
25+
#include "llvm/ADT/StringMap.h"
2526
#include "llvm/ADT/StringRef.h"
2627
#include "llvm/ADT/iterator.h"
2728
#include "llvm/ADT/iterator_range.h"
@@ -58,7 +59,7 @@ class ModuleManager {
5859
SmallVector<ModuleFile *, 2> Roots;
5960

6061
/// All loaded modules, indexed by name.
61-
llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
62+
llvm::StringMap<ModuleFile *> Modules;
6263

6364
/// FileManager that handles translating between filenames and
6465
/// FileEntry *.
@@ -179,7 +180,7 @@ class ModuleManager {
179180
ModuleFile *lookupByModuleName(StringRef ModName) const;
180181

181182
/// Returns the module associated with the given module file.
182-
ModuleFile *lookup(const FileEntry *File) const;
183+
ModuleFile *lookup(FileEntryRef File) const;
183184

184185
/// Returns the in-memory (virtual file) buffer with the given name
185186
std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name);
@@ -283,26 +284,6 @@ class ModuleManager {
283284
void visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
284285
llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit = nullptr);
285286

286-
/// Attempt to resolve the given module file name to a file entry.
287-
///
288-
/// \param FileName The name of the module file.
289-
///
290-
/// \param ExpectedSize The size that the module file is expected to have.
291-
/// If the actual size differs, the resolver should return \c true.
292-
///
293-
/// \param ExpectedModTime The modification time that the module file is
294-
/// expected to have. If the actual modification time differs, the resolver
295-
/// should return \c true.
296-
///
297-
/// \param File Will be set to the file if there is one, or null
298-
/// otherwise.
299-
///
300-
/// \returns True if a file exists but does not meet the size/
301-
/// modification time criteria, false if the file is either available and
302-
/// suitable, or is missing.
303-
bool lookupModuleFile(StringRef FileName, off_t ExpectedSize,
304-
time_t ExpectedModTime, OptionalFileEntryRef &File);
305-
306287
/// View the graphviz representation of the module graph.
307288
void viewGraph();
308289

clang/lib/Serialization/ModuleManager.cpp

Lines changed: 57 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
5858
return nullptr;
5959
}
6060

61-
ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
62-
return Modules.lookup(File);
61+
ModuleFile *ModuleManager::lookup(FileEntryRef File) const {
62+
return Modules.lookup(File.getName());
6363
}
6464

6565
std::unique_ptr<llvm::MemoryBuffer>
@@ -106,105 +106,48 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
106106
std::string &ErrorStr) {
107107
Module = nullptr;
108108

109-
// Look for the file entry. This only fails if the expected size or
110-
// modification time differ.
111-
OptionalFileEntryRef Entry;
112-
if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
113-
// If we're not expecting to pull this file out of the module cache, it
114-
// might have a different mtime due to being moved across filesystems in
115-
// a distributed build. The size must still match, though. (As must the
116-
// contents, but we can't check that.)
117-
ExpectedModTime = 0;
118-
}
119-
// Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
120-
// when using an ASTFileSignature.
121-
if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
122-
ErrorStr = "module file has a different size or mtime than expected";
123-
return OutOfDate;
124-
}
125-
126-
if (!Entry) {
127-
ErrorStr = "module file not found";
128-
return Missing;
129-
}
130-
131-
// The ModuleManager's use of FileEntry nodes as the keys for its map of
132-
// loaded modules is less than ideal. Uniqueness for FileEntry nodes is
133-
// maintained by FileManager, which in turn uses inode numbers on hosts
134-
// that support that. When coupled with the module cache's proclivity for
135-
// turning over and deleting stale PCMs, this means entries for different
136-
// module files can wind up reusing the same underlying inode. When this
137-
// happens, subsequent accesses to the Modules map will disagree on the
138-
// ModuleFile associated with a given file. In general, it is not sufficient
139-
// to resolve this conundrum with a type like FileEntryRef that stores the
140-
// name of the FileEntry node on first access because of path canonicalization
141-
// issues. However, the paths constructed for implicit module builds are
142-
// fully under Clang's control. We *can*, therefore, rely on their structure
143-
// being consistent across operating systems and across subsequent accesses
144-
// to the Modules map.
145-
auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
146-
FileEntryRef Entry) -> bool {
147-
if (Kind != MK_ImplicitModule)
148-
return true;
149-
return Entry.getName() == MF->FileName;
150-
};
151-
152109
// Check whether we already loaded this module, before
153-
if (ModuleFile *ModuleEntry = Modules.lookup(*Entry)) {
154-
if (implicitModuleNamesMatch(Type, ModuleEntry, *Entry)) {
155-
// Check the stored signature.
156-
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
157-
return OutOfDate;
158-
159-
Module = ModuleEntry;
160-
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
161-
return AlreadyLoaded;
162-
}
110+
if (ModuleFile *ModuleEntry = Modules.lookup(FileName)) {
111+
// Check the stored signature.
112+
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
113+
return OutOfDate;
114+
115+
Module = ModuleEntry;
116+
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
117+
return AlreadyLoaded;
163118
}
164119

165-
// Allocate a new module.
166-
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
167-
NewModule->Index = Chain.size();
168-
NewModule->FileName = FileName.str();
169-
NewModule->ImportLoc = ImportLoc;
170-
NewModule->InputFilesValidationTimestamp = 0;
171-
172-
if (NewModule->Kind == MK_ImplicitModule) {
173-
std::string TimestampFilename =
174-
ModuleFile::getTimestampFilename(NewModule->FileName);
175-
llvm::vfs::Status Status;
176-
// A cached stat value would be fine as well.
177-
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
178-
NewModule->InputFilesValidationTimestamp =
179-
llvm::sys::toTimeT(Status.getLastModificationTime());
180-
}
120+
OptionalFileEntryRef Entry;
121+
llvm::MemoryBuffer *ModuleBuffer = nullptr;
181122

182123
// Load the contents of the module
183124
if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
184125
// The buffer was already provided for us.
185-
NewModule->Buffer = &ModuleCache->addBuiltPCM(FileName, std::move(Buffer));
186-
// Since the cached buffer is reused, it is safe to close the file
187-
// descriptor that was opened while stat()ing the PCM in
188-
// lookupModuleFile() above, it won't be needed any longer.
189-
Entry->closeFile();
126+
ModuleBuffer = &ModuleCache->addBuiltPCM(FileName, std::move(Buffer));
190127
} else if (llvm::MemoryBuffer *Buffer =
191128
getModuleCache().lookupPCM(FileName)) {
192-
NewModule->Buffer = Buffer;
193-
// As above, the file descriptor is no longer needed.
194-
Entry->closeFile();
129+
ModuleBuffer = Buffer;
195130
} else if (getModuleCache().shouldBuildPCM(FileName)) {
196131
// Report that the module is out of date, since we tried (and failed) to
197132
// import it earlier.
198-
Entry->closeFile();
199133
return OutOfDate;
200134
} else {
135+
Entry = FileName == "-"
136+
? expectedToOptional(FileMgr.getSTDIN())
137+
: FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
138+
/*CacheFailure=*/false);
139+
if (!Entry) {
140+
ErrorStr = "module file not found";
141+
return Missing;
142+
}
143+
201144
// Get a buffer of the file and close the file descriptor when done.
202145
// The file is volatile because in a parallel build we expect multiple
203146
// compiler processes to use the same module file rebuilding it if needed.
204147
//
205148
// RequiresNullTerminator is false because module files don't need it, and
206149
// this allows the file to still be mmapped.
207-
auto Buf = FileMgr.getBufferForFile(NewModule->File,
150+
auto Buf = FileMgr.getBufferForFile(*Entry,
208151
/*IsVolatile=*/true,
209152
/*RequiresNullTerminator=*/false);
210153

@@ -213,9 +156,39 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
213156
return Missing;
214157
}
215158

216-
NewModule->Buffer = &getModuleCache().addPCM(FileName, std::move(*Buf));
159+
if ((ExpectedSize && ExpectedSize != Entry->getSize()) ||
160+
(ExpectedModTime && ExpectedModTime != Entry->getModificationTime())) {
161+
ErrorStr = "module file has a different size or mtime than expected";
162+
return OutOfDate;
163+
}
164+
165+
ModuleBuffer = &getModuleCache().addPCM(FileName, std::move(*Buf));
166+
}
167+
168+
// TODO: Make it so that ModuleFile is not tied to a FileEntry.
169+
if (!Entry && !((Entry = FileMgr.getVirtualFileRef(FileName, ExpectedSize,
170+
ExpectedModTime))))
171+
return Missing;
172+
173+
// Allocate a new module.
174+
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
175+
NewModule->Index = Chain.size();
176+
NewModule->FileName = FileName.str();
177+
NewModule->ImportLoc = ImportLoc;
178+
NewModule->InputFilesValidationTimestamp = 0;
179+
180+
if (NewModule->Kind == MK_ImplicitModule) {
181+
std::string TimestampFilename =
182+
ModuleFile::getTimestampFilename(NewModule->FileName);
183+
llvm::vfs::Status Status;
184+
// A cached stat value would be fine as well.
185+
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
186+
NewModule->InputFilesValidationTimestamp =
187+
llvm::sys::toTimeT(Status.getLastModificationTime());
217188
}
218189

190+
NewModule->Buffer = ModuleBuffer;
191+
219192
// Initialize the stream.
220193
NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
221194

@@ -226,7 +199,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
226199
return OutOfDate;
227200

228201
// We're keeping this module. Store it everywhere.
229-
Module = Modules[*Entry] = NewModule.get();
202+
Module = Modules[FileName] = NewModule.get();
230203

231204
updateModuleImports(*NewModule, ImportedBy, ImportLoc);
232205

@@ -272,7 +245,7 @@ void ModuleManager::removeModules(ModuleIterator First) {
272245

273246
// Delete the modules.
274247
for (ModuleIterator victim = First; victim != Last; ++victim)
275-
Modules.erase(victim->File);
248+
Modules.erase(victim->File.getName());
276249

277250
Chain.erase(Chain.begin() + (First - begin()), Chain.end());
278251
}
@@ -432,29 +405,6 @@ void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
432405
returnVisitState(std::move(State));
433406
}
434407

435-
bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
436-
time_t ExpectedModTime,
437-
OptionalFileEntryRef &File) {
438-
if (FileName == "-") {
439-
File = expectedToOptional(FileMgr.getSTDIN());
440-
return false;
441-
}
442-
443-
// Open the file immediately to ensure there is no race between stat'ing and
444-
// opening the file.
445-
File = FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
446-
/*CacheFailure=*/false);
447-
448-
if (File &&
449-
((ExpectedSize && ExpectedSize != File->getSize()) ||
450-
(ExpectedModTime && ExpectedModTime != File->getModificationTime())))
451-
// Do not destroy File, as it may be referenced. If we need to rebuild it,
452-
// it will be destroyed by removeModules.
453-
return true;
454-
455-
return false;
456-
}
457-
458408
#ifndef NDEBUG
459409
namespace llvm {
460410

0 commit comments

Comments
 (0)