Skip to content

Commit b39c290

Browse files
elizabethandrewstahonermann
authored andcommitted
[Clang] Fix linker error for function multiversioning (llvm#71706)
Currently target_clones attribute results in a linker error when there are no multi-versioned function declarations in the calling TU. In the calling TU, the call is generated with the ‘normal’ assembly name. This does not match any of the versions or the ifunc, since version mangling includes a .versionstring, and the ifunc includes .ifunc suffix. The linker error is not seen with GCC since the mangling for the ifunc symbol in GCC is the ‘normal’ assembly name for function i.e. no ifunc suffix. This PR removes the .ifunc suffix to match GCC. It also adds alias with the .ifunc suffix so as to ensure backward compatibility. The changes exclude aarch64 target because the mangling for default versions on aarch64 does not include a .default suffix and is the 'normal' assembly name, unlike other targets. It is not clear to me what the correct behavior for this target is. Old Phabricator review - https://reviews.llvm.org/D158666 --------- Co-authored-by: Tom Honermann <tom@honermann.net>
1 parent 67295eb commit b39c290

File tree

5 files changed

+77
-30
lines changed

5 files changed

+77
-30
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,9 @@ Bug Fixes in This Version
746746
- Fixes crash when trying to obtain the common sugared type of
747747
`decltype(instantiation-dependent-expr)`.
748748
Fixes (`#67603 <https://github.com/llvm/llvm-project/issues/67603>`_)
749+
- Fix the name of the ifunc symbol emitted for multiversion functions declared with the
750+
``target_clones`` attribute. This addresses a linker error that would otherwise occur
751+
when these functions are referenced from other TUs.
749752

750753
Bug Fixes to Compiler Builtins
751754
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/AttrDocs.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2667,6 +2667,13 @@ example, the following will emit 4 versions of the function:
26672667
__attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default")))
26682668
void foo() {}
26692669

2670+
For targets that support the GNU indirect function (IFUNC) feature, dispatch
2671+
is performed by emitting an indirect function that is resolved to the appropriate
2672+
target clone at load time. The indirect function is given the name the
2673+
multiversioned function would have if it had been declared without the attribute.
2674+
For backward compatibility with earlier Clang releases, a function alias with an
2675+
``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated
2676+
feature and support for it may be removed in the future.
26702677
}];
26712678
}
26722679

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4047,8 +4047,29 @@ void CodeGenModule::emitMultiVersionFunctions() {
40474047
}
40484048

40494049
llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD);
4050-
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant))
4050+
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
40514051
ResolverConstant = IFunc->getResolver();
4052+
// In Aarch64, default versions of multiversioned functions are mangled to
4053+
// their 'normal' assembly name. This deviates from other targets which
4054+
// append a '.default' string. As a result we need to continue appending
4055+
// .ifunc in Aarch64.
4056+
// FIXME: Should Aarch64 mangling for 'default' multiversion function and
4057+
// in turn ifunc function match that of other targets?
4058+
if (FD->isTargetClonesMultiVersion() &&
4059+
!getTarget().getTriple().isAArch64()) {
4060+
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
4061+
llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI);
4062+
std::string MangledName = getMangledNameImpl(
4063+
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
4064+
// In prior versions of Clang, the mangling for ifuncs incorrectly
4065+
// included an .ifunc suffix. This alias is generated for backward
4066+
// compatibility. It is deprecated, and may be removed in the future.
4067+
auto *Alias = llvm::GlobalAlias::create(
4068+
DeclTy, 0, getMultiversionLinkage(*this, GD),
4069+
MangledName + ".ifunc", IFunc, &getModule());
4070+
SetCommonAttributes(FD, Alias);
4071+
}
4072+
}
40524073
llvm::Function *ResolverFunc = cast<llvm::Function>(ResolverConstant);
40534074

40544075
ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
@@ -4214,10 +4235,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
42144235
// Holds the name of the resolver, in ifunc mode this is the ifunc (which has
42154236
// a separate resolver).
42164237
std::string ResolverName = MangledName;
4217-
if (getTarget().supportsIFunc())
4218-
ResolverName += ".ifunc";
4219-
else if (FD->isTargetMultiVersion())
4238+
if (getTarget().supportsIFunc()) {
4239+
// In Aarch64, default versions of multiversioned functions are mangled to
4240+
// their 'normal' assembly name. This deviates from other targets which
4241+
// append a '.default' string. As a result we need to continue appending
4242+
// .ifunc in Aarch64.
4243+
// FIXME: Should Aarch64 mangling for 'default' multiversion function and
4244+
// in turn ifunc function match that of other targets?
4245+
if (!FD->isTargetClonesMultiVersion() ||
4246+
getTarget().getTriple().isAArch64())
4247+
ResolverName += ".ifunc";
4248+
} else if (FD->isTargetMultiVersion()) {
42204249
ResolverName += ".resolver";
4250+
}
42214251

42224252
// If the resolver has already been created, just return it.
42234253
if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))

clang/test/CodeGen/attr-target-clones.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
// WINDOWS: $foo_inline = comdat any
1717
// WINDOWS: $foo_inline2 = comdat any
1818

19-
// LINUX: @foo.ifunc = weak_odr ifunc i32 (), ptr @foo.resolver
20-
// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), ptr @foo_dupes.resolver
21-
// LINUX: @unused.ifunc = weak_odr ifunc void (), ptr @unused.resolver
22-
// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), ptr @foo_inline.resolver
23-
// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), ptr @foo_inline2.resolver
24-
// LINUX: @foo_used_no_defn.ifunc = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver
19+
// LINUX: @foo = weak_odr ifunc i32 (), ptr @foo.resolver
20+
// LINUX: @foo_dupes = weak_odr ifunc void (), ptr @foo_dupes.resolver
21+
// LINUX: @unused = weak_odr ifunc void (), ptr @unused.resolver
22+
// LINUX: @foo_inline = weak_odr ifunc i32 (), ptr @foo_inline.resolver
23+
// LINUX: @foo_inline2 = weak_odr ifunc i32 (), ptr @foo_inline2.resolver
24+
// LINUX: @foo_used_no_defn = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver
2525

2626
int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; }
2727
// LINUX: define {{.*}}i32 @foo.sse4.2.0()
@@ -66,8 +66,8 @@ void bar2(void) {
6666
// DARWIN: define {{.*}}void @bar2()
6767
// WINDOWS: define dso_local void @bar2()
6868
foo_dupes();
69-
// LINUX: call void @foo_dupes.ifunc()
70-
// DARWIN: call void @foo_dupes.ifunc()
69+
// LINUX: call void @foo_dupes()
70+
// DARWIN: call void @foo_dupes()
7171
// WINDOWS: call void @foo_dupes()
7272
}
7373

@@ -76,8 +76,8 @@ int bar(void) {
7676
// DARWIN: define {{.*}}i32 @bar() #[[DEF:[0-9]+]]
7777
// WINDOWS: define dso_local i32 @bar() #[[DEF:[0-9]+]]
7878
return foo();
79-
// LINUX: call i32 @foo.ifunc()
80-
// DARWIN: call i32 @foo.ifunc()
79+
// LINUX: call i32 @foo()
80+
// DARWIN: call i32 @foo()
8181
// WINDOWS: call i32 @foo()
8282
}
8383

@@ -111,10 +111,10 @@ int bar3(void) {
111111
// DARWIN: define {{.*}}i32 @bar3()
112112
// WINDOWS: define dso_local i32 @bar3()
113113
return foo_inline() + foo_inline2();
114-
// LINUX: call i32 @foo_inline.ifunc()
115-
// LINUX: call i32 @foo_inline2.ifunc()
116-
// DARWIN: call i32 @foo_inline.ifunc()
117-
// DARWIN: call i32 @foo_inline2.ifunc()
114+
// LINUX: call i32 @foo_inline()
115+
// LINUX: call i32 @foo_inline2()
116+
// DARWIN: call i32 @foo_inline()
117+
// DARWIN: call i32 @foo_inline2()
118118
// WINDOWS: call i32 @foo_inline()
119119
// WINDOWS: call i32 @foo_inline2()
120120
}
@@ -163,8 +163,8 @@ int test_foo_used_no_defn(void) {
163163
// DARWIN: define {{.*}}i32 @test_foo_used_no_defn()
164164
// WINDOWS: define dso_local i32 @test_foo_used_no_defn()
165165
return foo_used_no_defn();
166-
// LINUX: call i32 @foo_used_no_defn.ifunc()
167-
// DARWIN: call i32 @foo_used_no_defn.ifunc()
166+
// LINUX: call i32 @foo_used_no_defn()
167+
// DARWIN: call i32 @foo_used_no_defn()
168168
// WINDOWS: call i32 @foo_used_no_defn()
169169
}
170170

clang/test/CodeGenCXX/attr-target-clones.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@
44

55
// DARWIN-NOT: comdat
66

7+
// Aliases for ifuncs
8+
// ITANIUM: @_Z10overloadedi.ifunc = weak_odr alias i32 (i32), ptr @_Z10overloadedi
9+
// ITANIUM: @_Z10overloadedPKc.ifunc = weak_odr alias i32 (ptr), ptr @_Z10overloadedPKc
10+
// ITANIUM: @_ZN1CIssE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIssE3fooEv
11+
// ITANIUM: @_ZN1CIisE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIisE3fooEv
12+
// ITANIUM: @_ZN1CIdfE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIdfE3fooEv
13+
714
// Overloaded ifuncs
8-
// ITANIUM: @_Z10overloadedi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver
9-
// ITANIUM: @_Z10overloadedPKc.ifunc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver
15+
// ITANIUM: @_Z10overloadedi = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver
16+
// ITANIUM: @_Z10overloadedPKc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver
1017
// struct 'C' ifuncs, note the 'float, U' one doesn't get one.
11-
// ITANIUM: @_ZN1CIssE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver
12-
// ITANIUM: @_ZN1CIisE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver
13-
// ITANIUM: @_ZN1CIdfE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver
18+
// ITANIUM: @_ZN1CIssE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver
19+
// ITANIUM: @_ZN1CIisE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver
20+
// ITANIUM: @_ZN1CIdfE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver
1421

1522
int __attribute__((target_clones("sse4.2", "default"))) overloaded(int) { return 1; }
1623
// ITANIUM: define {{.*}}i32 @_Z10overloadedi.sse4.2.0(i32{{.+}})
@@ -42,10 +49,10 @@ int __attribute__((target_clones("arch=ivybridge", "default"))) overloaded(const
4249

4350
void use_overloaded() {
4451
overloaded(1);
45-
// ITANIUM: call noundef i32 @_Z10overloadedi.ifunc
52+
// ITANIUM: call noundef i32 @_Z10overloadedi
4653
// WINDOWS: call noundef i32 @"?overloaded@@YAHH@Z"
4754
overloaded(nullptr);
48-
// ITANIUM: call noundef i32 @_Z10overloadedPKc.ifunc
55+
// ITANIUM: call noundef i32 @_Z10overloadedPKc
4956
// WINDOWS: call noundef i32 @"?overloaded@@YAHPEBD@Z"
5057
}
5158

@@ -69,11 +76,11 @@ int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 3;}
6976
void uses_specialized() {
7077
C<short, short> c;
7178
c.foo();
72-
// ITANIUM: call noundef i32 @_ZN1CIssE3fooEv.ifunc(ptr
79+
// ITANIUM: call noundef i32 @_ZN1CIssE3fooEv(ptr
7380
// WINDOWS: call noundef i32 @"?foo@?$C@FF@@QEAAHXZ"(ptr
7481
C<int, short> c2;
7582
c2.foo();
76-
// ITANIUM: call noundef i32 @_ZN1CIisE3fooEv.ifunc(ptr
83+
// ITANIUM: call noundef i32 @_ZN1CIisE3fooEv(ptr
7784
// WINDOWS: call noundef i32 @"?foo@?$C@HF@@QEAAHXZ"(ptr
7885
C<float, short> c3;
7986
c3.foo();
@@ -82,7 +89,7 @@ void uses_specialized() {
8289
// WINDOWS: call noundef i32 @"?foo@?$C@MF@@QEAAHXZ"(ptr
8390
C<double, float> c4;
8491
c4.foo();
85-
// ITANIUM: call noundef i32 @_ZN1CIdfE3fooEv.ifunc(ptr
92+
// ITANIUM: call noundef i32 @_ZN1CIdfE3fooEv(ptr
8693
// WINDOWS: call noundef i32 @"?foo@?$C@NM@@QEAAHXZ"(ptr
8794
}
8895

0 commit comments

Comments
 (0)