Skip to content

[lld][macho] Support order cstrings with -order_file_cstring #140307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ struct Configuration {
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;

llvm::StringRef cStringOrderFilePath;
llvm::StringRef irpgoProfilePath;
bool bpStartupFunctionSort = false;
bool bpCompressionSortStartupFunctions = false;
Expand Down
3 changes: 3 additions & 0 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2178,6 +2178,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
StringRef orderFile = args.getLastArgValue(OPT_order_file);
if (!orderFile.empty())
priorityBuilder.parseOrderFile(orderFile);
config->cStringOrderFilePath = args.getLastArgValue(OPT_order_file_cstring);
if (!config->cStringOrderFilePath.empty())
priorityBuilder.parseOrderFileCString(config->cStringOrderFilePath);

referenceStubBinder();

Expand Down
4 changes: 4 additions & 0 deletions lld/MachO/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
HelpText<"Layout functions and data according to specification in <file>">,
Group<grp_opts>;
def order_file_cstring : Separate<["-"], "order_file_cstring">,
MetaVarName<"<file>">,
HelpText<"Layout cstrings according to specification in <file>">,
Group<grp_opts>;
def no_order_inits : Flag<["-"], "no_order_inits">,
HelpText<"Disable default reordering of initializer and terminator functions">,
Flags<[HelpHidden]>,
Expand Down
57 changes: 57 additions & 0 deletions lld/MachO/SectionPriorities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,60 @@ macho::PriorityBuilder::buildInputSectionPriorities() {

return sectionPriorities;
}

void macho::PriorityBuilder::parseOrderFileCString(StringRef path) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer) {
error("Could not read cstring order file at " + path);
return;
}
MemoryBufferRef mbref = *buffer;
int priority = std::numeric_limits<int>::min();
for (StringRef line : args::getLines(mbref)) {
if (line.empty())
continue;
uint32_t hash = 0;
if (!to_integer(line, hash))
continue;
Comment on lines +404 to +405
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to just silently ignore these errors? Is it valid to just continue parsing once we've encountered such a scenario?
Could this ever be a hex value?

Copy link
Contributor Author

@SharonXSharon SharonXSharon May 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I guess we don't want to exit just because we encounter an invalid line?
The hash is indeed a hex, the to_integer should be true for parsing a hex number. The hash we are using is the existing hash lld use for cstring literal dedup,
in

uint32_t hash = xxh3_64bits(str) & 0x7fffffff;

auto [it, wasInserted] = cStringPriorities.try_emplace(hash, priority);
if (wasInserted)
++priority;
}
}

std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities(
ArrayRef<CStringInputSection *> inputs) {
// Split the input strings into hold and cold sets.
// Order hot set based on -order_file_cstring for performance improvement;
// TODO: Order cold set of cstrings for compression via BP.
std::vector<std::pair<int, StringPiecePair>>
hotStringPrioritiesAndStringPieces;
std::vector<StringPiecePair> coldStringPieces;
std::vector<StringPiecePair> orderedStringPieces;

for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;

auto it = cStringPriorities.find(piece.hash);
if (it != cStringPriorities.end())
hotStringPrioritiesAndStringPieces.emplace_back(
it->second, std::make_pair(isec, stringPieceIdx));
else
coldStringPieces.emplace_back(isec, stringPieceIdx);
}
}

// Order hot set for perf
llvm::stable_sort(hotStringPrioritiesAndStringPieces);
for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces)
orderedStringPieces.push_back(stringPiecePair);

// TODO: Order cold set for compression

orderedStringPieces.insert(orderedStringPieces.end(),
coldStringPieces.begin(), coldStringPieces.end());

return orderedStringPieces;
}
20 changes: 20 additions & 0 deletions lld/MachO/SectionPriorities.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
namespace lld::macho {

using SectionPair = std::pair<const InputSection *, const InputSection *>;
using StringPiecePair = std::pair<CStringInputSection *, size_t>;

class PriorityBuilder {
public:
Expand Down Expand Up @@ -55,6 +56,23 @@ class PriorityBuilder {
// contains.
llvm::DenseMap<const InputSection *, int> buildInputSectionPriorities();

// Reads the cstring order file at `path` into cStringPriorities.
// An cstring order file has one entry per line, in the following format:
//
// <hash of cstring literal content>
//
// Cstring literals are not symbolized, we can't identify them by name
// However, cstrings are deduplicated, hence unique, so we use the hash of
// the content of cstring literals to identify them and assign priority to it.
// We use the same hash as used in StringPiece, i.e. 31 bit:
// xxh3_64bits(string) & 0x7fffffff
//
// Additionally, given they are deduplicated and unique, we don't need to know
// which object file they are from.
void parseOrderFileCString(StringRef path);
std::vector<StringPiecePair>
buildCStringPriorities(ArrayRef<CStringInputSection *>);

private:
// The symbol with the smallest priority should be ordered first in the output
// section (modulo input section contiguity constraints).
Expand All @@ -68,6 +86,8 @@ class PriorityBuilder {

std::optional<int> getSymbolPriority(const Defined *sym);
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
/// A map from cstring literal hashes to priorities
llvm::DenseMap<uint32_t, int> cStringPriorities;
llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
};

Expand Down
34 changes: 17 additions & 17 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "MachOStructs.h"
#include "ObjC.h"
#include "OutputSegment.h"
#include "SectionPriorities.h"
#include "SymbolTable.h"
#include "Symbols.h"

Expand Down Expand Up @@ -1766,26 +1767,25 @@ void DeduplicatedCStringSection::finalizeContents() {
}
}

// Assign an offset for each string and save it to the corresponding
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
auto s = isec->getCachedHashStringRef(i);
auto it = stringOffsetMap.find(s);
assert(it != stringOffsetMap.end());
StringOffset &offsetInfo = it->second;
if (offsetInfo.outSecOff == UINT64_MAX) {
offsetInfo.outSecOff =
alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros);
size =
offsetInfo.outSecOff + s.size() + 1; // account for null terminator
}
piece.outSecOff = offsetInfo.outSecOff;
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
auto it = stringOffsetMap.find(s);
assert(it != stringOffsetMap.end());
lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo =
it->second;
if (offsetInfo.outSecOff == UINT64_MAX) {
offsetInfo.outSecOff =
alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros);
size = offsetInfo.outSecOff + s.size() + 1; // account for null terminator
}
isec->isFinal = true;
piece.outSecOff = offsetInfo.outSecOff;
}
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
}

void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
Expand Down
Loading