Skip to content

[LTO] Reduce memory usage for import lists #106772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1206,7 +1206,8 @@ static void runThinLTOBackend(
// We can simply import the values mentioned in the combined index, since
// we should only invoke this using the individual indexes written out
// via a WriteIndexesThinBackend.
FunctionImporter::ImportMapTy ImportList;
FunctionImporter::ImportIDTable ImportIDs;
FunctionImporter::ImportMapTy ImportList(ImportIDs);
if (!lto::initImportList(*M, *CombinedIndex, ImportList))
return;

Expand Down
71 changes: 54 additions & 17 deletions llvm/include/llvm/Transforms/IPO/FunctionImport.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#include <memory>
#include <string>
#include <system_error>
#include <unordered_map>
#include <unordered_set>
#include <utility>

namespace llvm {
Expand All @@ -33,14 +31,6 @@ class Module;
/// based on the provided summary informations.
class FunctionImporter {
public:
/// The functions to import from a source module and their import type.
/// Note we choose unordered_map over (Small)DenseMap. The number of imports
/// from a source module could be small but DenseMap size grows to 64 quickly
/// and not memory efficient (see
/// https://llvm.org/docs/ProgrammersManual.html#llvm-adt-densemap-h)
using FunctionsToImportTy =
std::unordered_map<GlobalValue::GUID, GlobalValueSummary::ImportKind>;

/// The different reasons selectCallee will chose not to import a
/// candidate.
enum class ImportFailureReason {
Expand Down Expand Up @@ -156,6 +146,12 @@ class FunctionImporter {
return std::make_tuple(FromModule, GUID, Kind);
}

// The same as lookup above. Useful for map_iterator.
std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind>
operator()(ImportIDTable::ImportIDTy ImportID) const {
return lookup(ImportID);
}

private:
// Make a pair of import IDs [Def, Decl] from an index into TheTable.
static std::pair<ImportIDTy, ImportIDTy> makeIDPair(ImportIDTy Index) {
Expand All @@ -167,6 +163,9 @@ class FunctionImporter {
MapVector<std::pair<StringRef, GlobalValue::GUID>, ImportIDTy> TheTable;
};

// Forward-declare SortedImportList for ImportMapTy.
class SortedImportList;

/// The map maintains the list of imports. Conceptually, it is a collection
/// of tuples of the form:
///
Expand All @@ -179,8 +178,6 @@ class FunctionImporter {
/// path string table).
class ImportMapTy {
public:
using ImportMapTyImpl = DenseMap<StringRef, FunctionsToImportTy>;

enum class AddDefinitionStatus {
// No change was made to the list of imports or whether each import should
// be imported as a declaration or definition.
Expand All @@ -192,6 +189,9 @@ class FunctionImporter {
ChangedToDefinition,
};

ImportMapTy() = delete;
ImportMapTy(ImportIDTable &IDs) : IDs(IDs) {}

// Add the given GUID to ImportList as a definition. If the same GUID has
// been added as a declaration previously, that entry is overridden.
AddDefinitionStatus addDefinition(StringRef FromModule,
Expand All @@ -215,13 +215,49 @@ class FunctionImporter {
SmallVector<StringRef, 0> getSourceModules() const;

std::optional<GlobalValueSummary::ImportKind>
getImportType(const FunctionsToImportTy &GUIDToImportType,
GlobalValue::GUID GUID) const;
getImportType(StringRef FromModule, GlobalValue::GUID GUID) const;

// Iterate over the import list. The caller gets tuples of FromModule,
// GUID, and ImportKind instead of import IDs.
auto begin() const { return map_iterator(Imports.begin(), IDs); }
auto end() const { return map_iterator(Imports.end(), IDs); }

friend class SortedImportList;

private:
ImportIDTable &IDs;
DenseSet<ImportIDTable::ImportIDTy> Imports;
};

// A read-only copy of ImportMapTy with its contents sorted according to the
// given comparison function.
class SortedImportList {
public:
SortedImportList(const ImportMapTy &ImportMap,
llvm::function_ref<
bool(const std::pair<StringRef, GlobalValue::GUID> &,
const std::pair<StringRef, GlobalValue::GUID> &)>
Comp)
: IDs(ImportMap.IDs), Imports(iterator_range(ImportMap.Imports)) {
llvm::sort(Imports, [&](ImportIDTable::ImportIDTy L,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest using llvm::stable_sort to make it explicit it's a stable sort (i.e., even if the existing implementation is stable already)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask why? llvm::stable_sort implies that the original order matters in some way, but the original order here is the iteration order of DenseSet. There is nothing worth preserving there.

ImportIDTable::ImportIDTy R) {
auto Lookup = [&](ImportIDTable::ImportIDTy Id)
-> std::pair<StringRef, GlobalValue::GUID> {
auto Tuple = IDs.lookup(Id);
return std::make_pair(std::get<0>(Tuple), std::get<1>(Tuple));
};
return Comp(Lookup(L), Lookup(R));
});
}

const ImportMapTyImpl &getImportMap() const { return ImportMap; }
// Iterate over the import list. The caller gets tuples of FromModule,
// GUID, and ImportKind instead of import IDs.
auto begin() const { return map_iterator(Imports.begin(), IDs); }
auto end() const { return map_iterator(Imports.end(), IDs); }

private:
ImportMapTyImpl ImportMap;
const ImportIDTable &IDs;
SmallVector<ImportIDTable::ImportIDTy, 0> Imports;
};

// A map from destination modules to lists of imports.
Expand All @@ -231,7 +267,7 @@ class FunctionImporter {
ImportListsTy(size_t Size) : ListsImpl(Size) {}

ImportMapTy &operator[](StringRef DestMod) {
return ListsImpl.try_emplace(DestMod).first->second;
return ListsImpl.try_emplace(DestMod, ImportIDs).first->second;
}

size_t size() const { return ListsImpl.size(); }
Expand All @@ -242,6 +278,7 @@ class FunctionImporter {

private:
DenseMap<StringRef, ImportMapTy> ListsImpl;
ImportIDTable ImportIDs;
};

/// The set contains an entry for every global value that the module exports.
Expand Down
84 changes: 32 additions & 52 deletions llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,51 +174,33 @@ std::string llvm::computeLTOCacheKey(
for (auto GUID : ExportsGUID)
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));

// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
using ImportMapIteratorTy =
FunctionImporter::ImportMapTy::ImportMapTyImpl::const_iterator;
struct ImportModule {
ImportMapIteratorTy ModIt;
const ModuleSummaryIndex::ModuleInfo *ModInfo;

StringRef getIdentifier() const { return ModIt->getFirst(); }
const FunctionImporter::FunctionsToImportTy &getFunctions() const {
return ModIt->second;
}

const ModuleHash &getHash() const { return ModInfo->second; }
};

std::vector<ImportModule> ImportModulesVector;
ImportModulesVector.reserve(ImportList.getImportMap().size());

for (ImportMapIteratorTy It = ImportList.getImportMap().begin();
It != ImportList.getImportMap().end(); ++It) {
ImportModulesVector.push_back({It, Index.getModule(It->getFirst())});
}
// Order using module hash, to be both independent of module name and
// module order.
llvm::sort(ImportModulesVector,
[](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
return Lhs.getHash() < Rhs.getHash();
});
std::vector<std::pair<uint64_t, uint8_t>> ImportedGUIDs;
for (const ImportModule &Entry : ImportModulesVector) {
auto ModHash = Entry.getHash();
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));

AddUint64(Entry.getFunctions().size());

ImportedGUIDs.clear();
for (auto &[Fn, ImportType] : Entry.getFunctions())
ImportedGUIDs.push_back(std::make_pair(Fn, ImportType));
llvm::sort(ImportedGUIDs);
for (auto &[GUID, Type] : ImportedGUIDs) {
AddUint64(GUID);
AddUint8(Type);
auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
const std::pair<StringRef, GlobalValue::GUID> &R) {
return std::make_pair(Index.getModule(L.first)->second, L.second) <
std::make_pair(Index.getModule(R.first)->second, R.second);
};
FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);

// Count the number of imports for each source module.
DenseMap<StringRef, unsigned> ModuleToNumImports;
for (const auto &[FromModule, GUID, Type] : SortedImportList)
++ModuleToNumImports[FromModule];

std::optional<StringRef> LastModule;
for (const auto &[FromModule, GUID, Type] : SortedImportList) {
if (LastModule != FromModule) {
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
LastModule = FromModule;
auto ModHash = Index.getModule(FromModule)->second;
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
AddUint64(ModuleToNumImports[FromModule]);
}
AddUint64(GUID);
AddUint8(Type);
}

// Include the hash for the resolved ODR.
Expand Down Expand Up @@ -287,16 +269,14 @@ std::string llvm::computeLTOCacheKey(

// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
for (const ImportModule &ImpM : ImportModulesVector)
for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
GlobalValueSummary *S =
Index.findSummaryInModule(GUID, ImpM.getIdentifier());
AddUsedThings(S);
// If this is an alias, we also care about any types/etc. that the aliasee
// may reference.
if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
AddUsedThings(AS->getBaseObject());
}
for (const auto &[FromModule, GUID, Type] : SortedImportList) {
GlobalValueSummary *S = Index.findSummaryInModule(GUID, FromModule);
AddUsedThings(S);
// If this is an alias, we also care about any types/etc. that the aliasee
// may reference.
if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
AddUsedThings(AS->getBaseObject());
}

auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
AddString(TId);
Expand Down
Loading
Loading