Skip to content

[lld][macho] Strip .__uniq. and .llvm. hashes in -order_file #140670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ add_lld_library(lldCommon
Strings.cpp
TargetOptionsCommandFlags.cpp
Timer.cpp
Utils.cpp
VCSVersion.inc
Version.cpp

Expand Down
20 changes: 20 additions & 0 deletions lld/Common/Utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===- Utils.cpp ------------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// The file defines untils functions that can be shared across archs.
//===----------------------------------------------------------------------===//

#include "lld/Common/Utils.h"

using namespace llvm;
using namespace lld;

StringRef lld::utils::getRootSymbol(StringRef name) {
name.consume_back(".Tgm");
auto [P0, S0] = name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
return P1;
}
3 changes: 2 additions & 1 deletion lld/ELF/BPSectionOrderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
!orderer.secToSym.try_emplace(sec, d).second)
return;
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
rootSymbolToSectionIdxs[CachedHashStringRef(
lld::utils::getRootSymbol(sym.getName()))]
.insert(sections.size());
sections.emplace_back(sec);
};
Expand Down
2 changes: 1 addition & 1 deletion lld/MachO/BPSectionOrderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
size_t idx = sections.size();
sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
auto rootName = getRootSymbol(sym->getName());
auto rootName = lld::utils::getRootSymbol(sym->getName());
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
if (auto linkageName =
BPOrdererMachO::getResolvedLinkageName(rootName))
Expand Down
5 changes: 3 additions & 2 deletions lld/MachO/SectionPriorities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "lld/Common/Args.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Utils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -250,7 +251,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
if (sym->isAbsolute())
return std::nullopt;

auto it = priorities.find(sym->getName());
auto it = priorities.find(utils::getRootSymbol(sym->getName()));
if (it == priorities.end())
return std::nullopt;
const SymbolPriorityEntry &entry = it->second;
Expand Down Expand Up @@ -330,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
break;
}
}
symbol = line.trim();
symbol = utils::getRootSymbol(line.trim());

if (!symbol.empty()) {
SymbolPriorityEntry &entry = priorities[symbol];
Expand Down
20 changes: 4 additions & 16 deletions lld/include/lld/Common/BPSectionOrdererBase.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//===----------------------------------------------------------------------===//

#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Utils.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
Expand Down Expand Up @@ -147,19 +148,6 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
return sectionUns;
}

/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
/// the global merge functions suffix
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
/// name before this suffix so these symbols can be matched with profiles which
/// may have different suffixes.
inline StringRef getRootSymbol(StringRef name) {
name.consume_back(".Tgm");
auto [P0, S0] = name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
return P1;
}

template <class D>
auto BPOrderer<D>::computeOrder(
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
Expand Down Expand Up @@ -197,7 +185,7 @@ auto BPOrderer<D>::computeOrder(
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
auto [_, parsedFuncName] = getParsedIRPGOName(
reader->getSymtab().getFuncOrVarName(trace[timestamp]));
parsedFuncName = getRootSymbol(parsedFuncName);
parsedFuncName = lld::utils::getRootSymbol(parsedFuncName);

auto sectionIdxsIt =
rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
Expand Down Expand Up @@ -375,7 +363,7 @@ auto BPOrderer<D>::computeOrder(
// 4?
uint64_t lastPage = endAddress / pageSize;
StringRef rootSymbol = D::getSymName(*sym);
rootSymbol = getRootSymbol(rootSymbol);
rootSymbol = lld::utils::getRootSymbol(rootSymbol);
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
Expand All @@ -393,7 +381,7 @@ auto BPOrderer<D>::computeOrder(
auto traceId = trace.FunctionNameRefs[step];
auto [Filename, ParsedFuncName] =
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
ParsedFuncName = getRootSymbol(ParsedFuncName);
ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName);
auto it = symbolToPageNumbers.find(ParsedFuncName);
if (it != symbolToPageNumbers.end()) {
auto &[firstPage, lastPage] = it->getValue();
Expand Down
28 changes: 28 additions & 0 deletions lld/include/lld/Common/Utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//===- Utils.h ------------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// The file declares untils functions that can be shared across archs.
//===----------------------------------------------------------------------===//

#ifndef LLD_UTILS_H
#define LLD_UTILS_H

#include "llvm/ADT/StringRef.h"

namespace lld {
namespace utils {

/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
/// the global merge functions suffix
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
/// name before this suffix so these symbols can be matched with profiles which
/// may have different suffixes.
llvm::StringRef getRootSymbol(llvm::StringRef Name);
} // namespace utils
} // namespace lld

#endif
94 changes: 94 additions & 0 deletions lld/test/MachO/order-file-strip-hashes.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# RUN: rm -rf %t && split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o

# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s

#--- a.s
.text
.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222

_main:
ret
A:
ret
F:
add w0, w0, #3
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret
C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
add w0, w0, #2
bl A
ret
D:
add w0, w0, #2
bl B
ret
B:
add w0, w0, #1
bl A
ret
E:
add w0, w0, #2
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret

.section __DATA,__objc_const
# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
.quad 789

_OBJC_$_CATEGORY_SOME_$_FOLDED:
_OBJC_$_CATEGORY_Foo_$_Cat1:
_ALPHABETIC_SORT_FIRST:
.quad 123

_OBJC_$_CATEGORY_Foo_$_Cat2:
.quad 222

_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
.quad 456

.section __DATA,__objc_data
_OBJC_CLASS_$_Foo:
.quad 123

_OBJC_CLASS_$_Bar.llvm.1234:
.quad 456

_OBJC_CLASS_$_Baz:
.quad 789

_OBJC_CLASS_$_Baz2:
.quad 999

.section __DATA,__objc_classrefs
.quad _OBJC_CLASS_$_Foo
.quad _OBJC_CLASS_$_Bar.llvm.1234
.quad _OBJC_CLASS_$_Baz

.subsections_via_symbols


#--- ord-1
# change order, parital covered
A
B
C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
_OBJC_CLASS_$_Baz
_OBJC_CLASS_$_Bar.__uniq.12345
_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567

# .text
# CHECK: A
# CHECK: B
# CHECK: C
# .section __DATA,__objc_const
# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
# .section __DATA,__objc_data
# CHECK: _OBJC_CLASS_$_Baz
# CHECK: _OBJC_CLASS_$_Bar
# CHECK: _OBJC_CLASS_$_Foo
Loading