Skip to content

Commit

Permalink
[X86] Add basic support for -mtune command line option in clang
Browse files Browse the repository at this point in the history
Building on the backend support from D85165. This parses the command line option in the driver, passes it on to CC1 and adds a function attribute.

-Still need to support tune on the target attribute.
-Need to use "generic" as the tuning by default. But need to change generic in the backend first.
-Need to set tune if march is specified and mtune isn't.
-May need to disable getHostCPUName's ability to guess CPU name from features when it doesn't have a family/model match for mtune=native. That's what gcc appears to do.

Differential Revision: https://reviews.llvm.org/D85384
  • Loading branch information
topperc committed Aug 18, 2020
1 parent 2f01785 commit 4cbceb7
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 1 deletion.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/TargetOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class TargetOptions {
/// If given, the name of the target CPU to generate code for.
std::string CPU;

/// If given, the name of the target CPU to tune code for.
std::string TuneCPU;

/// If given, the unit to use for floating point math.
std::string FPMath;

Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2715,7 +2715,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[DriverOption,CC1O
HelpText<"Provide information about a particular module file">;
def mthumb : Flag<["-"], "mthumb">, Group<m_Group>;
def mtune_EQ : Joined<["-"], "mtune=">, Group<m_Group>,
HelpText<"Accepted for compatibility with GCC. Currently has no effect.">;
HelpText<"Only supported on X86. Otherwise accepted for compatibility with GCC.">;
def multi__module : Flag<["-"], "multi_module">;
def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">;
def multiply__defined : Separate<["-"], "multiply_defined">;
Expand Down Expand Up @@ -3490,6 +3490,8 @@ let Flags = [CC1Option, CC1AsOption, NoDriverOption] in {

def target_cpu : Separate<["-"], "target-cpu">,
HelpText<"Target a specific cpu type">;
def tune_cpu : Separate<["-"], "tune-cpu">,
HelpText<"Tune for a specific cpu type">;
def target_feature : Separate<["-"], "target-feature">,
HelpText<"Target specific attributes">;
def triple : Separate<["-"], "triple">,
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Basic/Targets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,16 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
return nullptr;
}

// Check the TuneCPU name if specified.
if (!Opts->TuneCPU.empty() && !Target->isValidCPUName(Opts->TuneCPU)) {
Diags.Report(diag::err_target_unknown_cpu) << Opts->TuneCPU;
SmallVector<StringRef, 32> ValidList;
Target->fillValidCPUList(ValidList);
if (!ValidList.empty())
Diags.Report(diag::note_valid_options) << llvm::join(ValidList, ", ");
return nullptr;
}

// Set the target ABI if specified.
if (!Opts->ABI.empty() && !Target->setABI(Opts->ABI)) {
Diags.Report(diag::err_target_unknown_abi) << Opts->ABI;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// we have a decl for the function and it has a target attribute then
// parse that and add it to the feature set.
StringRef TargetCPU = getTarget().getTargetOpts().CPU;
StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU;
std::vector<std::string> Features;
const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
Expand Down Expand Up @@ -1783,6 +1784,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
Attrs.addAttribute("target-cpu", TargetCPU);
AddedAttr = true;
}
if (TuneCPU != "") {
Attrs.addAttribute("tune-cpu", TuneCPU);
AddedAttr = true;
}
if (!Features.empty()) {
llvm::sort(Features);
Attrs.addAttribute("target-features", llvm::join(Features, ","));
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/TargetParser.h"
Expand Down Expand Up @@ -2071,6 +2072,18 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
CmdArgs.push_back("soft");
CmdArgs.push_back("-mstack-alignment=4");
}

// Handle -mtune.
// FIXME: We should default to "generic" unless -march is set to match gcc.
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
StringRef Name = A->getValue();

if (Name == "native")
Name = llvm::sys::getHostCPUName();

CmdArgs.push_back("-tune-cpu");
CmdArgs.push_back(Args.MakeArgString(Name));
}
}

void Clang::AddHexagonTargetArgs(const ArgList &Args,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3658,6 +3658,7 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args,
Opts.EABIVersion = EABIVersion;
}
Opts.CPU = std::string(Args.getLastArgValue(OPT_target_cpu));
Opts.TuneCPU = std::string(Args.getLastArgValue(OPT_tune_cpu));
Opts.FPMath = std::string(Args.getLastArgValue(OPT_mfpmath));
Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature);
Opts.LinkerVersion =
Expand Down
29 changes: 29 additions & 0 deletions clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
// AARCH64: note: valid target CPU values are:
// AARCH64-SAME: cortex-a35,

// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
// TUNE_AARCH64: note: valid target CPU values are:
// TUNE_AARCH64-SAME: cortex-a35,

// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
// X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3,
Expand All @@ -32,6 +37,30 @@
// X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64

// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
// TUNE_X86: error: unknown target CPU 'not-a-cpu'
// TUNE_X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3,
// TUNE_X86-SAME: i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3,
// TUNE_X86-SAME: pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott,
// TUNE_X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont,
// TUNE_X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge,
// TUNE_X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512,
// TUNE_X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, tigerlake, knl, knm, lakemont, k6, k6-2, k6-3,
// TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
// TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
// TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
// TUNE_X86-SAME: x86-64, geode

// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
// TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
// TUNE_X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell,
// TUNE_X86_64-SAME: atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere,
// TUNE_X86_64-SAME: sandybridge, corei7-avx, ivybridge, core-avx-i, haswell,
// TUNE_X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake,
// TUNE_X86_64-SAME: icelake-client, icelake-server, tigerlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3,
// TUNE_X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
// TUNE_X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35,
Expand Down

0 comments on commit 4cbceb7

Please sign in to comment.