Skip to content

Commit cee5b87

Browse files
[Clang] Fix linker error for function multiversioning (#71706)
Currently target_clones attribute results in a linker error when there are no multi-versioned function declarations in the calling TU. In the calling TU, the call is generated with the ‘normal’ assembly name. This does not match any of the versions or the ifunc, since version mangling includes a .versionstring, and the ifunc includes .ifunc suffix. The linker error is not seen with GCC since the mangling for the ifunc symbol in GCC is the ‘normal’ assembly name for function i.e. no ifunc suffix. This PR removes the .ifunc suffix to match GCC. It also adds alias with the .ifunc suffix so as to ensure backward compatibility. The changes exclude aarch64 target because the mangling for default versions on aarch64 does not include a .default suffix and is the 'normal' assembly name, unlike other targets. It is not clear to me what the correct behavior for this target is. Old Phabricator review - https://reviews.llvm.org/D158666 --------- Co-authored-by: Tom Honermann <tom@honermann.net>
1 parent 651a49c commit cee5b87

File tree

5 files changed

+83
-26
lines changed

5 files changed

+83
-26
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,9 @@ Bug Fixes in This Version
651651
- Fixed false positive error emitted by clang when performing qualified name
652652
lookup and the current class instantiation has dependent bases.
653653
Fixes (`#13826 <https://github.com/llvm/llvm-project/issues/13826>`_)
654+
- Fix the name of the ifunc symbol emitted for multiversion functions declared with the
655+
``target_clones`` attribute. This addresses a linker error that would otherwise occur
656+
when these functions are referenced from other TUs.
654657
- Fixes compile error that double colon operator cannot resolve macro with parentheses.
655658
Fixes (`#64467 <https://github.com/llvm/llvm-project/issues/64467>`_)
656659
- Clang's ``-Wchar-subscripts`` no longer warns on chars whose values are known non-negative constants.

clang/include/clang/Basic/AttrDocs.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,6 +2552,13 @@ example, the following will emit 4 versions of the function:
25522552
__attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default")))
25532553
void foo() {}
25542554

2555+
For targets that support the GNU indirect function (IFUNC) feature, dispatch
2556+
is performed by emitting an indirect function that is resolved to the appropriate
2557+
target clone at load time. The indirect function is given the name the
2558+
multiversioned function would have if it had been declared without the attribute.
2559+
For backward compatibility with earlier Clang releases, a function alias with an
2560+
``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated
2561+
feature and support for it may be removed in the future.
25552562
}];
25562563
}
25572564

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4178,8 +4178,29 @@ void CodeGenModule::emitMultiVersionFunctions() {
41784178
}
41794179

41804180
llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD);
4181-
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant))
4181+
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
41824182
ResolverConstant = IFunc->getResolver();
4183+
// In Aarch64, default versions of multiversioned functions are mangled to
4184+
// their 'normal' assembly name. This deviates from other targets which
4185+
// append a '.default' string. As a result we need to continue appending
4186+
// .ifunc in Aarch64.
4187+
// FIXME: Should Aarch64 mangling for 'default' multiversion function and
4188+
// in turn ifunc function match that of other targets?
4189+
if (FD->isTargetClonesMultiVersion() &&
4190+
!getTarget().getTriple().isAArch64()) {
4191+
const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
4192+
llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI);
4193+
std::string MangledName = getMangledNameImpl(
4194+
*this, GD, FD, /*OmitMultiVersionMangling=*/true);
4195+
// In prior versions of Clang, the mangling for ifuncs incorrectly
4196+
// included an .ifunc suffix. This alias is generated for backward
4197+
// compatibility. It is deprecated, and may be removed in the future.
4198+
auto *Alias = llvm::GlobalAlias::create(
4199+
DeclTy, 0, getMultiversionLinkage(*this, GD),
4200+
MangledName + ".ifunc", IFunc, &getModule());
4201+
SetCommonAttributes(FD, Alias);
4202+
}
4203+
}
41834204
llvm::Function *ResolverFunc = cast<llvm::Function>(ResolverConstant);
41844205

41854206
ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
@@ -4346,10 +4367,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
43464367
// Holds the name of the resolver, in ifunc mode this is the ifunc (which has
43474368
// a separate resolver).
43484369
std::string ResolverName = MangledName;
4349-
if (getTarget().supportsIFunc())
4350-
ResolverName += ".ifunc";
4351-
else if (FD->isTargetMultiVersion())
4370+
if (getTarget().supportsIFunc()) {
4371+
// In Aarch64, default versions of multiversioned functions are mangled to
4372+
// their 'normal' assembly name. This deviates from other targets which
4373+
// append a '.default' string. As a result we need to continue appending
4374+
// .ifunc in Aarch64.
4375+
// FIXME: Should Aarch64 mangling for 'default' multiversion function and
4376+
// in turn ifunc function match that of other targets?
4377+
if (!FD->isTargetClonesMultiVersion() ||
4378+
getTarget().getTriple().isAArch64())
4379+
ResolverName += ".ifunc";
4380+
} else if (FD->isTargetMultiVersion()) {
43524381
ResolverName += ".resolver";
4382+
}
43534383

43544384
// If the resolver has already been created, just return it.
43554385
if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))

clang/test/CodeGen/attr-target-clones.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,23 @@
1616
// LINUX: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] }
1717
// LINUX: @__cpu_features2 = external dso_local global [3 x i32]
1818

19-
// LINUX: @internal.ifunc = internal ifunc i32 (), ptr @internal.resolver
20-
// LINUX: @foo.ifunc = weak_odr ifunc i32 (), ptr @foo.resolver
21-
// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), ptr @foo_dupes.resolver
22-
// LINUX: @unused.ifunc = weak_odr ifunc void (), ptr @unused.resolver
23-
// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), ptr @foo_inline.resolver
24-
// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), ptr @foo_inline2.resolver
25-
// LINUX: @foo_used_no_defn.ifunc = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver
19+
// LINUX: @internal.ifunc = internal alias i32 (), ptr @internal
20+
// LINUX: @foo.ifunc = weak_odr alias i32 (), ptr @foo
21+
// LINUX: @foo_dupes.ifunc = weak_odr alias void (), ptr @foo_dupes
22+
// LINUX: @unused.ifunc = weak_odr alias void (), ptr @unused
23+
// LINUX: @foo_inline.ifunc = weak_odr alias i32 (), ptr @foo_inline
24+
// LINUX: @foo_inline2.ifunc = weak_odr alias i32 (), ptr @foo_inline2
25+
// LINUX: @foo_used_no_defn.ifunc = weak_odr alias i32 (), ptr @foo_used_no_defn
26+
// LINUX: @isa_level.ifunc = weak_odr alias i32 (i32), ptr @isa_level
27+
28+
// LINUX: @internal = internal ifunc i32 (), ptr @internal.resolver
29+
// LINUX: @foo = weak_odr ifunc i32 (), ptr @foo.resolver
30+
// LINUX: @foo_dupes = weak_odr ifunc void (), ptr @foo_dupes.resolver
31+
// LINUX: @unused = weak_odr ifunc void (), ptr @unused.resolver
32+
// LINUX: @foo_inline = weak_odr ifunc i32 (), ptr @foo_inline.resolver
33+
// LINUX: @foo_inline2 = weak_odr ifunc i32 (), ptr @foo_inline2.resolver
34+
// LINUX: @foo_used_no_defn = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver
35+
// LINUX: @isa_level = weak_odr ifunc i32 (i32), ptr @isa_level.resolver
2636

2737
static int __attribute__((target_clones("sse4.2, default"))) internal(void) { return 0; }
2838
int use(void) { return internal(); }
@@ -60,15 +70,15 @@ void bar2(void) {
6070
// LINUX: define {{.*}}void @bar2()
6171
// WINDOWS: define dso_local void @bar2()
6272
foo_dupes();
63-
// LINUX: call void @foo_dupes.ifunc()
73+
// LINUX: call void @foo_dupes()
6474
// WINDOWS: call void @foo_dupes()
6575
}
6676

6777
int bar(void) {
6878
// LINUX: define {{.*}}i32 @bar() #[[DEF:[0-9]+]]
6979
// WINDOWS: define dso_local i32 @bar() #[[DEF:[0-9]+]]
7080
return foo();
71-
// LINUX: call i32 @foo.ifunc()
81+
// LINUX: call i32 @foo()
7282
// WINDOWS: call i32 @foo()
7383
}
7484

@@ -95,8 +105,8 @@ int bar3(void) {
95105
// LINUX: define {{.*}}i32 @bar3()
96106
// WINDOWS: define dso_local i32 @bar3()
97107
return foo_inline() + foo_inline2();
98-
// LINUX: call i32 @foo_inline.ifunc()
99-
// LINUX: call i32 @foo_inline2.ifunc()
108+
// LINUX: call i32 @foo_inline()
109+
// LINUX: call i32 @foo_inline2()
100110
// WINDOWS: call i32 @foo_inline()
101111
// WINDOWS: call i32 @foo_inline2()
102112
}
@@ -134,7 +144,7 @@ int test_foo_used_no_defn(void) {
134144
// LINUX: define {{.*}}i32 @test_foo_used_no_defn()
135145
// WINDOWS: define dso_local i32 @test_foo_used_no_defn()
136146
return foo_used_no_defn();
137-
// LINUX: call i32 @foo_used_no_defn.ifunc()
147+
// LINUX: call i32 @foo_used_no_defn()
138148
// WINDOWS: call i32 @foo_used_no_defn()
139149
}
140150

clang/test/CodeGenCXX/attr-target-clones.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
22
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
33

4+
// Aliases for ifuncs
5+
// LINUX: @_Z10overloadedi.ifunc = weak_odr alias i32 (i32), ptr @_Z10overloadedi
6+
// LINUX: @_Z10overloadedPKc.ifunc = weak_odr alias i32 (ptr), ptr @_Z10overloadedPKc
7+
// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIssE3fooEv
8+
// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIisE3fooEv
9+
// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIdfE3fooEv
10+
411
// Overloaded ifuncs
5-
// LINUX: @_Z10overloadedi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver
6-
// LINUX: @_Z10overloadedPKc.ifunc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver
12+
// LINUX: @_Z10overloadedi = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver
13+
// LINUX: @_Z10overloadedPKc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver
714
// struct 'C' ifuncs, note the 'float, U' one doesn't get one.
8-
// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver
9-
// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver
10-
// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver
15+
// LINUX: @_ZN1CIssE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver
16+
// LINUX: @_ZN1CIisE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver
17+
// LINUX: @_ZN1CIdfE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver
1118

1219
int __attribute__((target_clones("sse4.2", "default"))) overloaded(int) { return 1; }
1320
// LINUX: define {{.*}}i32 @_Z10overloadedi.sse4.2.0(i32{{.+}})
@@ -37,10 +44,10 @@ int __attribute__((target_clones("arch=ivybridge", "default"))) overloaded(const
3744

3845
void use_overloaded() {
3946
overloaded(1);
40-
// LINUX: call noundef i32 @_Z10overloadedi.ifunc
47+
// LINUX: call noundef i32 @_Z10overloadedi
4148
// WINDOWS: call noundef i32 @"?overloaded@@YAHH@Z"
4249
overloaded(nullptr);
43-
// LINUX: call noundef i32 @_Z10overloadedPKc.ifunc
50+
// LINUX: call noundef i32 @_Z10overloadedPKc
4451
// WINDOWS: call noundef i32 @"?overloaded@@YAHPEBD@Z"
4552
}
4653

@@ -64,11 +71,11 @@ int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 3;}
6471
void uses_specialized() {
6572
C<short, short> c;
6673
c.foo();
67-
// LINUX: call noundef i32 @_ZN1CIssE3fooEv.ifunc(ptr
74+
// LINUX: call noundef i32 @_ZN1CIssE3fooEv(ptr
6875
// WINDOWS: call noundef i32 @"?foo@?$C@FF@@QEAAHXZ"(ptr
6976
C<int, short> c2;
7077
c2.foo();
71-
// LINUX: call noundef i32 @_ZN1CIisE3fooEv.ifunc(ptr
78+
// LINUX: call noundef i32 @_ZN1CIisE3fooEv(ptr
7279
// WINDOWS: call noundef i32 @"?foo@?$C@HF@@QEAAHXZ"(ptr
7380
C<float, short> c3;
7481
c3.foo();
@@ -77,7 +84,7 @@ void uses_specialized() {
7784
// WINDOWS: call noundef i32 @"?foo@?$C@MF@@QEAAHXZ"(ptr
7885
C<double, float> c4;
7986
c4.foo();
80-
// LINUX: call noundef i32 @_ZN1CIdfE3fooEv.ifunc(ptr
87+
// LINUX: call noundef i32 @_ZN1CIdfE3fooEv(ptr
8188
// WINDOWS: call noundef i32 @"?foo@?$C@NM@@QEAAHXZ"(ptr
8289
}
8390

0 commit comments

Comments
 (0)