-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CUDA] Move CUDA to new driver by default #122312
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 37.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122312.diff 7 Files Affected:
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 57fa7c1110a68e..62d9ff9e2558a6 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -353,8 +353,7 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
- (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) ||
- CCGenDiagnostics) {
+ (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) {
FinalPhase = phases::Preprocess;
// --precompile only runs up to precompilation.
@@ -368,7 +367,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
(PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) ||
- (PhaseArg = DAL.getLastArg(options::OPT_print_enabled_extensions)) ||
+ (PhaseArg =
+ DAL.getLastArg(options::OPT_print_enabled_extensions)) ||
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
@@ -379,18 +379,18 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
FinalPhase = phases::Compile;
- // -S only runs up to the backend.
+ // -S only runs up to the backend.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Backend;
- // -c compilation only runs up to the assembler.
+ // -c compilation only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
} else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) {
FinalPhase = phases::IfsMerge;
- // Otherwise do everything.
+ // Otherwise do everything.
} else
FinalPhase = phases::Link;
@@ -523,8 +523,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
-static llvm::Triple computeTargetTriple(const Driver &D,
- StringRef TargetTriple,
+static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple,
const ArgList &Args,
StringRef DarwinArchName = "") {
// FIXME: Already done in Compilation *Driver::BuildCompilation
@@ -643,8 +642,8 @@ static llvm::Triple computeTargetTriple(const Driver &D,
// Handle -miamcu flag.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
- D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
- << Target.str();
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << "-miamcu" << Target.str();
if (A && !A->getOption().matches(options::OPT_m32))
D.Diag(diag::err_drv_argument_not_allowed_with)
@@ -1570,14 +1569,13 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
StringRef Name = A->getValue();
unsigned Model = llvm::StringSwitch<unsigned>(Name)
- .Case("off", EmbedNone)
- .Case("all", EmbedBitcode)
- .Case("bitcode", EmbedBitcode)
- .Case("marker", EmbedMarker)
- .Default(~0U);
+ .Case("off", EmbedNone)
+ .Case("all", EmbedBitcode)
+ .Case("bitcode", EmbedBitcode)
+ .Case("marker", EmbedMarker)
+ .Default(~0U);
if (Model == ~0U) {
- Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
- << Name;
+ Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
} else
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
}
@@ -1626,8 +1624,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
// Owned by the host.
- const ToolChain &TC = getToolChain(
- *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
+ const ToolChain &TC =
+ getToolChain(*UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
// Check if the environment version is valid except wasm case.
llvm::Triple Triple = TC.getTriple();
@@ -1788,7 +1786,7 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
if (LineEnd == StringRef::npos)
continue;
- StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
+ StringRef ParentProcess = Data.slice(ParentProcPos + 15, LineEnd).trim();
int OpenBracket = -1, CloseBracket = -1;
for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
if (ParentProcess[i] == '[')
@@ -1801,7 +1799,8 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
int CrashPID;
if (OpenBracket < 0 || CloseBracket < 0 ||
ParentProcess.slice(OpenBracket + 1, CloseBracket)
- .getAsInteger(10, CrashPID) || CrashPID != PID) {
+ .getAsInteger(10, CrashPID) ||
+ CrashPID != PID) {
continue;
}
@@ -2057,8 +2056,7 @@ void Driver::generateCompilationDiagnostics(
CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Crash backtrace is located in";
- Diag(clang::diag::note_drv_command_failed_diag_msg)
- << CrashDiagDir.str();
+ Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "(choose the .crash file that corresponds to your crash)";
}
@@ -2170,8 +2168,7 @@ void Driver::PrintHelp(bool ShowHidden) const {
std::string Usage = llvm::formatv("{0} [options] file...", Name).str();
getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(),
- ShowHidden, /*ShowAllAliases=*/false,
- VisibilityMask);
+ ShowHidden, /*ShowAllAliases=*/false, VisibilityMask);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
@@ -2348,11 +2345,11 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
if (C.getArgs().hasArg(options::OPT_v)) {
if (!SystemConfigDir.empty())
- llvm::errs() << "System configuration file directory: "
- << SystemConfigDir << "\n";
+ llvm::errs() << "System configuration file directory: " << SystemConfigDir
+ << "\n";
if (!UserConfigDir.empty())
- llvm::errs() << "User configuration file directory: "
- << UserConfigDir << "\n";
+ llvm::errs() << "User configuration file directory: " << UserConfigDir
+ << "\n";
}
const ToolChain &TC = C.getDefaultToolChain();
@@ -2431,7 +2428,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
StringRef ProgName = A->getValue();
// Null program name cannot have a path.
- if (! ProgName.empty())
+ if (!ProgName.empty())
llvm::outs() << GetProgramPath(ProgName, TC);
llvm::outs() << "\n";
@@ -2665,7 +2662,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
- for (Action* Act : SingleActions) {
+ for (Action *Act : SingleActions) {
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
@@ -2708,7 +2705,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
// Verify the debug info output.
if (Args.hasArg(options::OPT_verify_debug_info)) {
- Action* LastAction = Actions.back();
+ Action *LastAction = Actions.back();
Actions.pop_back();
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
LastAction, types::TY_Nothing));
@@ -2889,7 +2886,8 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
- if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
+ if (IsCLMode() &&
+ (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
Ty = types::TY_CXX;
else if (CCCIsCPP() || CCGenDiagnostics)
Ty = types::TY_C;
@@ -3093,7 +3091,7 @@ class OffloadingActionBuilder final {
virtual void appendLinkDeviceActions(ActionList &AL) {}
/// Append linker host action generated by the builder.
- virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
+ virtual Action *appendLinkHostActions(ActionList &AL) { return nullptr; }
/// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
@@ -3800,15 +3798,15 @@ class OffloadingActionBuilder final {
for (auto &LI : DeviceLinkerInputs) {
types::ID Output = Args.hasArg(options::OPT_emit_llvm)
- ? types::TY_LLVM_BC
- : types::TY_Image;
+ ? types::TY_LLVM_BC
+ : types::TY_Image;
auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
- DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
- GpuArchList[I], AssociatedOffloadKind);
+ DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+ AssociatedOffloadKind);
Actions.push_back(C.MakeAction<OffloadAction>(
DeviceLinkDeps, DeviceLinkAction->getType()));
++I;
@@ -3817,8 +3815,8 @@ class OffloadingActionBuilder final {
// If emitting LLVM, do not generate final host/device compilation action
if (Args.hasArg(options::OPT_emit_llvm)) {
- AL.append(Actions);
- return;
+ AL.append(Actions);
+ return;
}
// Create a host object from all the device images by embedding them
@@ -3839,7 +3837,7 @@ class OffloadingActionBuilder final {
}
}
- Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
+ Action *appendLinkHostActions(ActionList &AL) override { return AL.back(); }
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
};
@@ -4081,7 +4079,7 @@ class OffloadingActionBuilder final {
return nullptr;
// Let builders add host linking actions.
- Action* HA = nullptr;
+ Action *HA = nullptr;
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
@@ -4176,7 +4174,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
getOpts().getOption(options::OPT_frtlib_add_rpath));
}
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
- if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
+ if (Args.hasArg(options::OPT_emit_llvm) &&
+ !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
if (C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() &&
LTOMode != LTOK_None &&
@@ -4334,7 +4333,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
Args.hasFlag(options::OPT_foffload_via_llvm,
options::OPT_fno_offload_via_llvm, false) ||
Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false);
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda));
// Builder to be used to build offloading actions.
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -5084,7 +5084,8 @@ Action *Driver::ConstructPhaseAction(
offloadDeviceOnly() ||
(TargetDeviceOffloadKind == Action::OFK_HIP &&
!Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false)))
+ options::OPT_no_offload_new_driver,
+ C.isOffloadingHostKind(Action::OFK_Cuda))))
? types::TY_LLVM_IR
: types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
@@ -5545,8 +5546,8 @@ class ToolSelector final {
continue;
}
- // This is legal to combine. Append any offload action we found and add the
- // current input to preprocessor inputs.
+ // This is legal to combine. Append any offload action we found and add
+ // the current input to preprocessor inputs.
CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
PreprocessJobOffloadActions.end());
NewInputs.append(PJ->input_begin(), PJ->input_end());
@@ -5569,8 +5570,7 @@ class ToolSelector final {
/// connected to collapsed actions are updated accordingly. The latter enables
/// the caller of the selector to process them afterwards instead of just
/// dropping them. If no suitable tool is found, null will be returned.
- const Tool *getTool(ActionList &Inputs,
- ActionList &CollapsedOffloadAction) {
+ const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) {
//
// Get the largest chain of actions that we could combine.
//
@@ -5613,7 +5613,7 @@ class ToolSelector final {
return T;
}
};
-}
+} // namespace
/// Return a string that uniquely identifies the result of a job. The bound arch
/// is not necessarily represented in the toolchain's triple -- for example,
@@ -5784,9 +5784,9 @@ InputInfoList Driver::BuildJobsForActionNoCache(
StringRef ArchName = BAA->getArchName();
if (!ArchName.empty())
- TC = &getToolChain(C.getArgs(),
- computeTargetTriple(*this, TargetTriple,
- C.getArgs(), ArchName));
+ TC = &getToolChain(
+ C.getArgs(),
+ computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName));
else
TC = &C.getDefaultToolChain();
@@ -5795,7 +5795,6 @@ InputInfoList Driver::BuildJobsForActionNoCache(
TargetDeviceOffloadKind);
}
-
ActionList Inputs = A->getInputs();
const JobAction *JA = cast<JobAction>(A);
@@ -5929,10 +5928,11 @@ InputInfoList Driver::BuildJobsForActionNoCache(
/*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
!(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
AtTopLevel));
- Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
- AtTopLevel, MultipleArchs,
- OffloadingPrefix),
- BaseInput);
+ Result =
+ InputInfo(A,
+ GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel,
+ MultipleArchs, OffloadingPrefix),
+ BaseInput);
if (T->canEmitIR() && OffloadingPrefix.empty())
handleTimeTrace(C, Args, JA, BaseInput, Result);
}
@@ -6260,12 +6260,10 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
}
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));
- } else if ((JA.getType() == types::TY_Plist || JA.getType() == types::TY_AST) &&
+ } else if ((JA.getType() == types::TY_Plist ||
+ JA.getType() == types::TY_AST) &&
C.getArgs().hasArg(options::OPT__SLASH_o)) {
- StringRef Val =
- C.getArgs()
- .getLastArg(options::OPT__SLASH_o)
- ->getValue();
+ StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_o)->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
} else {
@@ -6581,15 +6579,15 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
case llvm::Triple::Linux:
case llvm::Triple::ELFIAMCU:
if (Target.getArch() == llvm::Triple::hexagon)
- TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
- Args);
+ TC =
+ std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
!Target.hasEnvironment())
TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
- Args);
+ Args);
else if (Target.isPPC())
TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
- Args);
+ Args);
else if (Target.getArch() == llvm::Triple::ve)
TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
else if (Target.isOHOSFamily())
@@ -6634,7 +6632,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
break;
case llvm::Triple::Itanium:
TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
- Args);
+ Args);
break;
case llvm::Triple::MSVC:
case llvm::Triple::UnknownEnvironment:
@@ -6643,8 +6641,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::CrossWindowsToolChain>(
*this, Target, Args);
else
- TC =
- std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
+ TC = std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
break;
}
break;
@@ -6678,8 +6675,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
break;
case llvm::Triple::hexagon:
- TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
- Args);
+ TC =
+ std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
break;
case llvm::Triple::lanai:
TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
@@ -6695,8 +6692,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
break;
case llvm::Triple::msp430:
- TC =
- std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
+ TC = std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
@@ -6877,7 +6873,7 @@ Driver::getOptionVisibilityMask(bool UseDriverMode) const {
return llvm::opt::Vis...
[truncated]
|
Giving up on HIP for now, want this to happen before LLVM21. I believe the main difference users will experience is that |
Summary: This patch updates the --offload-new-driver flag to be default for CUDA. This mostly just required updating a lot of tests to use the old format. I tried to update them where possible, but some were directly checking the old format. https://discourse.llvm.org/t/rfc-use-the-new-offloding-driver-for-cuda-and-hip-compilation-by-default/77468/18
One thing that's missing is the release note. It should have an entry about the change and a pointer to the details and instructions on how to revert to the new build and, possibly a set of instructions for common use cases. E.g. GPU-linking a library (i.e. linking RDC-compiled GPU objects in the library into a single GPU executable, so the library no longer needs GPU-aware linking for the final executable) |
Done, might also be good to update the documentation to mention CUDA more specifically. |
Summary:
This patch updates the --offload-new-driver flag to be default for CUDA.
This mostly just required updating a lot of tests to use the old format.
I tried to update them where possible, but some were directly checking
the old format.
https://discourse.llvm.org/t/rfc-use-the-new-offloding-driver-for-cuda-and-hip-compilation-by-default/77468/18