Skip to content

Commit b3fd2ea

Browse files
authored
[Clang][FMV] Stop emitting implicit default version using target_clones. (#141808)
With the current behavior the following example yields a linker error: "multiple definition of `foo.default'" // Translation Unit 1 __attribute__((target_clones("dotprod, sve"))) int foo(void) { return 1; } // Translation Unit 2 int foo(void) { return 0; } __attribute__((target_version("dotprod"))) int foo(void); __attribute__((target_version("sve"))) int foo(void); int bar(void) { return foo(); } That is because foo.default is generated twice. As a user I don't find this particularly intuitive. If I wanted the default to be generated in TU1 I'd rather write target_clones("dotprod, sve", "default") explicitly. When changing the code I noticed that the RISC-V target defers the resolver emission when encountering a target_version definition. This seems accidental since it only makes sense for AArch64, where we only emit a resolver once we've processed the entire TU, and only if the default version is present. I've changed this so that RISC-V immediately emmits the resolver. I adjusted the codegen tests since the functions now appear in a different order. Implements ARM-software/acle#377
1 parent 68fd6f4 commit b3fd2ea

13 files changed

+1112
-1238
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4237,19 +4237,19 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
42374237
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
42384238
} else if (auto *TC = FD->getAttr<TargetClonesAttr>()) {
42394239
for (unsigned I = 0; I < TC->featuresStrs_size(); ++I)
4240-
// AArch64 favors the default target version over the clone if any.
4241-
if ((!TC->isDefaultVersion(I) || !getTarget().getTriple().isAArch64()) &&
4242-
TC->isFirstOfVersion(I))
4240+
if (TC->isFirstOfVersion(I))
42434241
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
4244-
// Ensure that the resolver function is also emitted.
4245-
GetOrCreateMultiVersionResolver(GD);
42464242
} else
42474243
EmitGlobalFunctionDefinition(GD, GV);
42484244

4249-
// Defer the resolver emission until we can reason whether the TU
4250-
// contains a default target version implementation.
4251-
if (FD->isTargetVersionMultiVersion())
4252-
AddDeferredMultiVersionResolverToEmit(GD);
4245+
// Ensure that the resolver function is also emitted.
4246+
if (FD->isTargetVersionMultiVersion() || FD->isTargetClonesMultiVersion()) {
4247+
// On AArch64 defer the resolver emission until the entire TU is processed.
4248+
if (getTarget().getTriple().isAArch64())
4249+
AddDeferredMultiVersionResolverToEmit(GD);
4250+
else
4251+
GetOrCreateMultiVersionResolver(GD);
4252+
}
42534253
}
42544254

42554255
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
@@ -4351,7 +4351,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
43514351
};
43524352

43534353
// For AArch64, a resolver is only emitted if a function marked with
4354-
// target_version("default")) or target_clones() is present and defined
4354+
// target_version("default")) or target_clones("default") is defined
43554355
// in this TU. For other architectures it is always emitted.
43564356
bool ShouldEmitResolver = !getTarget().getTriple().isAArch64();
43574357
SmallVector<CodeGenFunction::FMVResolverOption, 10> Options;
@@ -4374,12 +4374,11 @@ void CodeGenModule::emitMultiVersionFunctions() {
43744374
TVA->getFeatures(Feats, Delim);
43754375
Options.emplace_back(Func, Feats);
43764376
} else if (const auto *TC = CurFD->getAttr<TargetClonesAttr>()) {
4377-
if (IsDefined)
4378-
ShouldEmitResolver = true;
43794377
for (unsigned I = 0; I < TC->featuresStrs_size(); ++I) {
43804378
if (!TC->isFirstOfVersion(I))
43814379
continue;
4382-
4380+
if (TC->isDefaultVersion(I) && IsDefined)
4381+
ShouldEmitResolver = true;
43834382
llvm::Function *Func = createFunction(CurFD, I);
43844383
Feats.clear();
43854384
if (getTarget().getTriple().isX86()) {

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3495,13 +3495,7 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
34953495
if (HasCommas && AL.getNumArgs() > 1)
34963496
S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values);
34973497

3498-
if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasDefault) {
3499-
// Add default attribute if there is no one
3500-
HasDefault = true;
3501-
Strings.push_back("default");
3502-
}
3503-
3504-
if (!HasDefault) {
3498+
if (!HasDefault && !S.Context.getTargetInfo().getTriple().isAArch64()) {
35053499
S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
35063500
return;
35073501
}

clang/test/AST/attr-target-version.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -triple aarch64-linux-gnu -ast-dump %s | FileCheck %s
22

33
int __attribute__((target_version("sve2-bitperm + sha2"))) foov(void) { return 1; }
4-
int __attribute__((target_clones(" lse + fp + sha3 "))) fooc(void) { return 2; }
4+
int __attribute__((target_clones(" lse + fp + sha3 ", "default"))) fooc(void) { return 2; }
55
// CHECK: TargetVersionAttr
66
// CHECK: sve2-bitperm + sha2
77
// CHECK: TargetClonesAttr

clang/test/CodeGen/AArch64/fmv-detection.c

Lines changed: 388 additions & 388 deletions
Large diffs are not rendered by default.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_IMPLICIT_DEFAULT
2+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -verify -emit-llvm-only %s -DCHECK_EXPLICIT_DEFAULT
3+
4+
#if defined(CHECK_IMPLICIT_DEFAULT)
5+
6+
int implicit_default_ok(void) { return 0; }
7+
__attribute__((target_clones("aes", "lse"))) int implicit_default_ok(void) { return 1; }
8+
9+
int implicit_default_bad(void) { return 0; }
10+
// expected-error@+2 {{definition with same mangled name 'implicit_default_bad.default' as another definition}}
11+
// expected-note@-2 {{previous definition is here}}
12+
__attribute__((target_clones("aes", "lse", "default"))) int implicit_default_bad(void) { return 1; }
13+
14+
#elif defined(CHECK_EXPLICIT_DEFAULT)
15+
16+
__attribute__((target_version("default"))) int explicit_default_ok(void) { return 0; }
17+
__attribute__((target_clones("aes", "lse"))) int explicit_default_ok(void) { return 1; }
18+
19+
__attribute__((target_version("default"))) int explicit_default_bad(void) { return 0; }
20+
// expected-error@+2 {{definition with same mangled name 'explicit_default_bad.default' as another definition}}
21+
// expected-note@-2 {{previous definition is here}}
22+
__attribute__((target_clones("aes", "lse", "default"))) int explicit_default_bad(void) { return 1; }
23+
24+
#endif

clang/test/CodeGen/AArch64/fmv-features.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ __attribute__((target_version("crc+bti+bti+bti+aes+aes+bf16"))) int fmv(void) {
140140
__attribute__((target_version("non_existent_extension"))) int fmv(void);
141141

142142
// CHECK: define dso_local i32 @fmv.default() #[[default:[0-9]+]] {
143-
__attribute__((target_version("default"))) int fmv(void);
143+
__attribute__((target_version("default"))) int fmv(void) { return 0; }
144144

145145
int caller() {
146146
return fmv();

clang/test/CodeGen/AArch64/fmv-resolver-emission.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ inline __attribute__((target_version("default"))) void linkonce_func(void) {}
6868
void call_linkonce(void) { linkonce_func(); }
6969

7070

71+
// Test that an ifunc is generated when the clones attribute has a default version.
72+
__attribute__((target_clones("default", "aes"))) void clones_with_default(void) {}
73+
74+
75+
// Test that an ifunc is NOT generated when the clones attribute does not have a default version.
76+
__attribute__((target_clones("aes"))) void clones_without_default(void) {}
77+
78+
7179
//.
7280
// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
7381
// CHECK: @used_before_default_def = weak_odr ifunc void (), ptr @used_before_default_def.resolver
@@ -76,6 +84,7 @@ void call_linkonce(void) { linkonce_func(); }
7684
// CHECK: @indirect_use = weak_odr ifunc void (), ptr @indirect_use.resolver
7785
// CHECK: @internal_func = internal ifunc void (), ptr @internal_func.resolver
7886
// CHECK: @linkonce_func = weak_odr ifunc void (), ptr @linkonce_func.resolver
87+
// CHECK: @clones_with_default = weak_odr ifunc void (), ptr @clones_with_default.resolver
7988
//.
8089
// CHECK: Function Attrs: noinline nounwind optnone
8190
// CHECK-LABEL: define {{[^@]+}}@used_before_default_def._Maes
@@ -228,6 +237,27 @@ void call_linkonce(void) { linkonce_func(); }
228237
// CHECK-NEXT: ret void
229238
//
230239
//
240+
// CHECK: Function Attrs: noinline nounwind optnone
241+
// CHECK-LABEL: define {{[^@]+}}@clones_with_default.default
242+
// CHECK-SAME: () #[[ATTR2]] {
243+
// CHECK-NEXT: entry:
244+
// CHECK-NEXT: ret void
245+
//
246+
//
247+
// CHECK: Function Attrs: noinline nounwind optnone
248+
// CHECK-LABEL: define {{[^@]+}}@clones_with_default._Maes
249+
// CHECK-SAME: () #[[ATTR0]] {
250+
// CHECK-NEXT: entry:
251+
// CHECK-NEXT: ret void
252+
//
253+
//
254+
// CHECK: Function Attrs: noinline nounwind optnone
255+
// CHECK-LABEL: define {{[^@]+}}@clones_without_default._Maes
256+
// CHECK-SAME: () #[[ATTR0]] {
257+
// CHECK-NEXT: entry:
258+
// CHECK-NEXT: ret void
259+
//
260+
//
231261
// CHECK-LABEL: define {{[^@]+}}@used_before_default_def.resolver() comdat {
232262
// CHECK-NEXT: resolver_entry:
233263
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -339,6 +369,20 @@ void call_linkonce(void) { linkonce_func(); }
339369
// CHECK: resolver_else:
340370
// CHECK-NEXT: ret ptr @linkonce_func.default
341371
//
372+
//
373+
// CHECK-LABEL: define {{[^@]+}}@clones_with_default.resolver() comdat {
374+
// CHECK-NEXT: resolver_entry:
375+
// CHECK-NEXT: call void @__init_cpu_features_resolver()
376+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
377+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33536
378+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33536
379+
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
380+
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
381+
// CHECK: resolver_return:
382+
// CHECK-NEXT: ret ptr @clones_with_default._Maes
383+
// CHECK: resolver_else:
384+
// CHECK-NEXT: ret ptr @clones_with_default.default
385+
//
342386
//.
343387
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
344388
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}

0 commit comments

Comments
 (0)