Skip to content

[SYCL] Enable AMD GPU support. #3795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jun 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def do_configure(args):
libclc_targets_to_build = ''
sycl_build_pi_cuda = 'OFF'
sycl_build_pi_esimd_cpu = 'ON'
sycl_build_pi_rocm = 'OFF'
sycl_werror = 'ON'
llvm_enable_assertions = 'ON'
llvm_enable_doxygen = 'OFF'
Expand All @@ -47,6 +48,14 @@ def do_configure(args):

if args.disable_esimd_cpu:
sycl_build_pi_esimd_cpu = 'OFF'

if args.rocm:
llvm_targets_to_build += ';AMDGPU'
# TODO libclc should be added once,
# TODO when we build DPC++ with both CUDA and ROCM support
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'amdgcn--;amdgcn--amdhsa'
sycl_build_pi_rocm = 'ON'

if args.no_werror:
sycl_werror = 'OFF'
Expand Down Expand Up @@ -82,6 +91,7 @@ def do_configure(args):
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
"-DSYCL_BUILD_PI_ROCM={}".format(sycl_build_pi_rocm),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
Expand Down Expand Up @@ -151,6 +161,7 @@ def main():
parser.add_argument("-t", "--build-type",
metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
parser.add_argument("--rocm", action='store_true', help="swith from OpenCL to ROCM")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@malixian, one note.
As of today, we don't have AMD GPU HW in our CI system, so we won't be able to validate that other contributions do not break ROCM support.
I think we should decide ASAP, how ROCM support is going to be verified.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is indeed a problem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least for plugin we can use hip<->cuda source code compatibility

parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86")
parser.add_argument("--disable-esimd-cpu", action='store_true', help="build without ESIMD_CPU support")
parser.add_argument("--no-assertions", action='store_true', help="build without assertions")
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
// with a `spir_kernel` calling convention are ignored otherwise.
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
if (CC == llvm::CallingConv::SPIR_KERNEL &&
(CGM.getTriple().isNVPTX() || CGM.getTriple().isAMDGCN()) &&
getContext().getLangOpts().SYCLIsDevice) {
CC = llvm::CallingConv::C;
}
Expand Down
39 changes: 34 additions & 5 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,11 @@ static bool isValidSYCLTriple(llvm::Triple T) {
// NVPTX is valid for SYCL.
if (T.isNVPTX())
return true;

// AMDGCN is valid for SYCL
if (T.isAMDGCN())
return true;

// Check for invalid SYCL device triple values.
// Non-SPIR arch.
if (!T.isSPIR())
Expand Down Expand Up @@ -3898,6 +3903,21 @@ class OffloadingActionBuilder final {
return BA;
}

Action *finalizeAMDGCNDependences(Action *Input, const llvm::Triple &TT) {
auto *BA = C.getDriver().ConstructPhaseAction(
C, Args, phases::Backend, Input, AssociatedOffloadKind);

auto *AA = C.getDriver().ConstructPhaseAction(C, Args, phases::Assemble,
BA, AssociatedOffloadKind);

ActionList AL = {AA};
Action *LinkAction = C.MakeAction<LinkJobAction>(AL, types::TY_Image);
ActionList HIPActions = {LinkAction};
Action *HIPFatBinary =
C.MakeAction<LinkJobAction>(HIPActions, types::TY_HIP_FATBIN);
return HIPFatBinary;
}

public:
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
Expand Down Expand Up @@ -4294,6 +4314,7 @@ class OffloadingActionBuilder final {
ActionList LinkObjects;
auto TT = SYCLTripleList[I];
auto isNVPTX = (*TC)->getTriple().isNVPTX();
auto isAMDGCN = (*TC)->getTriple().isAMDGCN();
bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga ||
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen ||
TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64;
Expand Down Expand Up @@ -4391,7 +4412,7 @@ class OffloadingActionBuilder final {
// When spv online link is supported by all backends, the fallback
// device libraries are only needed when current toolchain is using
// AOT compilation.
if (!isNVPTX) {
if (!isNVPTX && !isAMDGCN) {
SYCLDeviceLibLinked = addSYCLDeviceLibs(
*TC, FullLinkObjects, true,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment());
Expand All @@ -4405,7 +4426,7 @@ class OffloadingActionBuilder final {
FullDeviceLinkAction = DeviceLinkAction;
// setup some flags upfront

if (isNVPTX && DeviceCodeSplit) {
if ((isNVPTX || isAMDGCN) && DeviceCodeSplit) {
// TODO Temporary limitation, need to support code splitting for PTX
const Driver &D = C.getDriver();
const std::string &OptName =
Expand All @@ -4417,14 +4438,14 @@ class OffloadingActionBuilder final {
}
// reflects whether current target is ahead-of-time and can't support
// runtime setting of specialization constants
bool isAOT = isNVPTX || isSpirvAOT;
bool isAOT = isNVPTX || isAMDGCN || isSpirvAOT;
// TODO support device code split for NVPTX target

ActionList WrapperInputs;
// post link is not optional - even if not splitting, always need to
// process specialization constants
types::ID PostLinkOutType =
isNVPTX ? types::TY_LLVM_BC : types::TY_Tempfiletable;
isNVPTX || isAMDGCN ? types::TY_LLVM_BC : types::TY_Tempfiletable;
auto *PostLinkAction = C.MakeAction<SYCLPostLinkJobAction>(
FullDeviceLinkAction, PostLinkOutType);
PostLinkAction->setRTSetsSpecConstants(!isAOT);
Expand All @@ -4433,6 +4454,10 @@ class OffloadingActionBuilder final {
Action *FinAction =
finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple());
WrapperInputs.push_back(FinAction);
} else if (isAMDGCN) {
Action *FinAction =
finalizeAMDGCNDependences(PostLinkAction, (*TC)->getTriple());
WrapperInputs.push_back(FinAction);
} else {
// For SPIRV-based targets - translate to SPIRV then optionally
// compile ahead-of-time to native architecture
Expand Down Expand Up @@ -7276,7 +7301,7 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
break;
case Action::OFK_HIP:
TC = std::make_unique<toolchains::HIPToolChain>(
*this, Target, HostTC, Args);
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case Action::OFK_OpenMP:
// omp + nvptx
Expand All @@ -7295,6 +7320,10 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::CudaToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case llvm::Triple::amdgcn:
TC = std::make_unique<toolchains::HIPToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
default:
break;
}
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,9 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
// matches the current toolchain triple. If it is not present
// at all, target and host share a toolchain.
if (A->getOption().matches(options::OPT_m_Group)) {
if (SameTripleAsHost)
// AMD GPU is a special case, as -mcpu is required for the device
// compilation.
if (SameTripleAsHost || getTriple().getArch() == llvm::Triple::amdgcn)
DAL->append(A);
else
Modified = true;
Expand Down
79 changes: 74 additions & 5 deletions clang/lib/Driver/ToolChains/HIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
// for backward compatibility. For code object version 4 and greater, the
// offload kind in bundle ID is 'hipv4'.
std::string OffloadKind = "hip";
if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
if (haveAMDGPUCodeObjectVersionArgument(C.getDriver(), Args) &&
getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
OffloadKind = OffloadKind + "v4";
for (const auto &II : Inputs) {
const auto* A = II.getAction();
Expand Down Expand Up @@ -231,8 +232,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}

HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const ArgList &Args)
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
const ToolChain &HostTC, const ArgList &Args,
const Action::OffloadKind OK)
: ROCMToolChain(D, Triple, Args), HostTC(HostTC), OK(OK) {
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
Expand All @@ -244,8 +246,11 @@ void HIPToolChain::addClangTargetOptions(
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

assert(DeviceOffloadingKind == Action::OFK_HIP &&
"Only HIP offloading kinds are supported for GPUs.");
assert((DeviceOffloadingKind == Action::OFK_HIP ||
DeviceOffloadingKind == Action::OFK_SYCL) &&
"Only HIP and SYCL offloading kinds are supported for GPUs.");

StringRef GpuArch = getGPUArch(DriverArgs);

CC1Args.push_back("-fcuda-is-device");

Expand Down Expand Up @@ -275,6 +280,57 @@ void HIPToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}

if (DeviceOffloadingKind == Action::OFK_SYCL) {
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
CC1Args);
}

auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv,
options::OPT_fsycl_device_only);
if (DeviceOffloadingKind == Action::OFK_SYCL && !NoLibSpirv) {
std::string LibSpirvFile;

if (DriverArgs.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) {
auto ProvidedPath =
DriverArgs
.getLastArgValue(
clang::driver::options::OPT_fsycl_libspirv_path_EQ)
.str();
if (llvm::sys::fs::exists(ProvidedPath))
LibSpirvFile = ProvidedPath;
} else {
SmallVector<StringRef, 8> LibraryPaths;

// Expected path w/out install.
SmallString<256> WithoutInstallPath(getDriver().ResourceDir);
llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
LibraryPaths.emplace_back(WithoutInstallPath.c_str());

// Expected path w/ install.
SmallString<256> WithInstallPath(getDriver().ResourceDir);
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
LibraryPaths.emplace_back(WithInstallPath.c_str());

std::string LibSpirvTargetName = "libspirv-amdgcn--amdhsa.bc";
for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibSpirvTargetFile(LibraryPath);
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
if (llvm::sys::fs::exists(LibSpirvTargetFile)) {
LibSpirvFile = std::string(LibSpirvTargetFile.str());
break;
}
}
}

if (LibSpirvFile.empty()) {
getDriver().Diag(diag::err_drv_no_sycl_libspirv);
return;
}

CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
}

llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
Expand Down Expand Up @@ -308,9 +364,22 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,

Tool *HIPToolChain::buildLinker() const {
assert(getTriple().getArch() == llvm::Triple::amdgcn);
if (OK == Action::OFK_SYCL)
return new tools::AMDGCN::SYCLLinker(*this);
return new tools::AMDGCN::Linker(*this);
}

Tool *HIPToolChain::SelectTool(const JobAction &JA) const {
if (OK == Action::OFK_SYCL) {
if (JA.getKind() == Action::LinkJobClass &&
JA.getType() == types::TY_LLVM_BC) {
return static_cast<tools::AMDGCN::SYCLLinker *>(ToolChain::SelectTool(JA))
->GetSYCLToolChainLinker();
}
}
return ToolChain::SelectTool(JA);
}

void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
}
Expand Down
21 changes: 20 additions & 1 deletion clang/lib/Driver/ToolChains/HIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
const JobAction &JA) const;
};

class LLVM_LIBRARY_VISIBILITY SYCLLinker : public Linker {
public:
SYCLLinker(const ToolChain &TC) : Linker(TC) {}

Tool *GetSYCLToolChainLinker() const {
if (!SYCLToolChainLinker)
SYCLToolChainLinker.reset(new SYCL::Linker(getToolChain()));
return SYCLToolChainLinker.get();
}

private:
mutable std::unique_ptr<Tool> SYCLToolChainLinker;
};

} // end namespace AMDGCN
} // end namespace tools

Expand All @@ -59,7 +73,8 @@ namespace toolchains {
class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain {
public:
HIPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const llvm::opt::ArgList &Args);
const ToolChain &HostTC, const llvm::opt::ArgList &Args,
const Action::OffloadKind OK);

const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
Expand Down Expand Up @@ -96,9 +111,13 @@ class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain {

const ToolChain &HostTC;
void checkTargetID(const llvm::opt::ArgList &DriverArgs) const override;
Tool *SelectTool(const JobAction &JA) const override;

protected:
Tool *buildLinker() const override;

private:
const Action::OffloadKind OK;
};

} // end namespace toolchains
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const char *LinkingOutput) const {

assert((getToolChain().getTriple().isSPIR() ||
getToolChain().getTriple().isNVPTX()) &&
getToolChain().getTriple().isNVPTX() ||
getToolChain().getTriple().isAMDGCN()) &&
"Unsupported target");

std::string SubArchName =
Expand All @@ -285,7 +286,8 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA,

// For CUDA, we want to link all BC files before resuming the normal
// compilation path
if (getToolChain().getTriple().isNVPTX()) {
if (getToolChain().getTriple().isNVPTX() ||
getToolChain().getTriple().isAMDGCN()) {
InputInfoList NvptxInputs;
for (const auto &II : Inputs) {
if (!II.isFilename())
Expand Down
38 changes: 38 additions & 0 deletions clang/test/Driver/sycl-offload-amdgcn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa-sycldevice`
// REQUIRES: clang-driver

// UNSUPPORTED: system-windows

/// Check action graph.
// RUN: %clangxx -### -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa-sycldevice -mcpu=gfx906 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ACTIONS %s
// CHK-ACTIONS: "-cc1" "-triple" "amdgcn-amd-amdhsa-sycldevice" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}} "-fsycl-is-device"{{.*}} "-Wno-sycl-strict"{{.*}} "-sycl-std=2020" {{.*}} "-internal-isystem" "{{.*}}bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl"{{.*}} "-mlink-builtin-bitcode" "{{.*}}libspirv.bc"{{.*}} "-target-cpu" "gfx906"{{.*}} "-std=c++11"{{.*}}
// CHK-ACTIONS-NOT: "-mllvm -sycl-opt"
// CHK-ACTIONS: clang-offload-wrapper"{{.*}} "-host=x86_64-unknown-linux-gnu" "-target=amdgcn" "-kind=sycl"{{.*}}

/// Check phases w/out specifying a compute capability.
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl -fsycl-use-footer \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa-sycldevice -mcpu=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
// CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
// CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
// CHK-PHASES-NO-CC: 2: append-footer, {1}, c++, (host-sycl)
// CHK-PHASES-NO-CC: 3: preprocessor, {2}, c++-cpp-output, (host-sycl)
// CHK-PHASES-NO-CC: 4: input, "{{.*}}", c++, (device-sycl)
// CHK-PHASES-NO-CC: 5: preprocessor, {4}, c++-cpp-output, (device-sycl)
// CHK-PHASES-NO-CC: compiler, {5}, ir, (device-sycl)
// CHK-PHASES-NO-CC: offload, "host-sycl (x86_64-unknown-linux-gnu)" {3}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {6}, c++-cpp-output
// CHK-PHASES-NO-CC: compiler, {7}, ir, (host-sycl)
// CHK-PHASES-NO-CC: backend, {8}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: assembler, {9}, object, (host-sycl)
// CHK-PHASES-NO-CC: linker, {10}, image, (host-sycl)
// CHK-PHASES-NO-CC: linker, {6}, ir, (device-sycl)
// CHK-PHASES-NO-CC: sycl-post-link, {12}, ir, (device-sycl)
// CHK-PHASES-NO-CC: backend, {13}, assembler, (device-sycl)
// CHK-PHASES-NO-CC: assembler, {14}, object, (device-sycl)
// CHK-PHASES-NO-CC: linker, {15}, image, (device-sycl)
// CHK-PHASES-NO-CC: linker, {16}, hip-fatbin, (device-sycl)
// CHK-PHASES-NO-CC: clang-offload-wrapper, {17}, object, (device-sycl)
// CHK-PHASES-NO-CC: offload, "host-sycl (x86_64-unknown-linux-gnu)" {11}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {18}, image
Loading