Skip to content

[SYCL][CUDA][HIP] Implement support for AMD and NVIDIA architectures as argument to fsycl-targets #7348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Dec 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
be33adc
[SYCL][CUDA][HIP] Update if_architecture_is extension to include NVID…
mmoadeli Nov 9, 2022
124a5d3
[SYCL] Moves if_architecture_is code header file from intel to oneapi.
mmoadeli Nov 9, 2022
9ee8129
[SYCL][Doc] Updates experimental/sycl_ext_intel_device_architecture.a…
mmoadeli Nov 10, 2022
a5adf7c
[SYCL][Doc] Rename sycl_ext_intel_device_architecture.asciidoc to syc…
mmoadeli Nov 10, 2022
c361c79
Merge branch 'intel:sycl' into nvida-amd-imp-ext
mmoadeli Nov 10, 2022
2ea6ff7
[SYCL][CUDA] Adds driver support for nvidia gpus.
mmoadeli Nov 14, 2022
71775fd
[SYCL][CUDA] Modify nvidia_gpu_* device support to be treated as nvp…
mmoadeli Nov 15, 2022
53ed132
[SYCL] Add support for gpu archs as argument for -fsycl-targets
mmoadeli Nov 20, 2022
cd21176
[SYCL] Rename sycl-intel-gpu.cpp to sycl-oneapli-gpu.cpp
mmoadeli Nov 20, 2022
4a20b80
[SYCL] Minor required update in a comment to reflect modifications.
mmoadeli Nov 21, 2022
0cf57e0
[SYCL] Rename TranslateAMDGPUTargetOpt to TranslateGPUTargetOpt
mmoadeli Nov 21, 2022
ff41b65
[SYCL] Updates User Manual to include AMD and NVIDIA device architect…
mmoadeli Nov 21, 2022
e3bfa04
[SYCL] Remove DPC++ un-supported nvidia architectures.
mmoadeli Nov 21, 2022
3da3359
[SYCL][HIP][CUDA] Improve lit test.
mmoadeli Nov 22, 2022
21d1148
[SYCL][HIP] Add AMD build phase lit test.
mmoadeli Nov 22, 2022
93fa882
Merge branch 'sycl' into nvida-amd-imp-ext
mmoadeli Nov 22, 2022
a0fc323
[SYCL] Add missing file from merge
mmoadeli Nov 22, 2022
aeab924
[SYCL] Removes unused TranslateGPUTargetOpt function argument
mmoadeli Nov 23, 2022
e6d89a7
[SYCL] Fix clang code style
mmoadeli Nov 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 36 additions & 15 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -824,19 +824,6 @@ static bool addSYCLDefaultTriple(Compilation &C,
return true;
}

// Prefix for Intel GPU specific targets used for -fsycl-targets
constexpr char IntelGPU[] = "intel_gpu_";

static llvm::Optional<StringRef> isIntelGPUTarget(StringRef Target) {
// Handle target specifications that resemble 'intel_gpu_*' here. These are
// 'spir64_gen' based.
if (Target.startswith(IntelGPU)) {
return tools::SYCL::gen::resolveGenDevice(
Target.drop_front(sizeof(IntelGPU) - 1));
}
return llvm::None;
}

void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
InputList &Inputs) {

Expand All @@ -845,6 +832,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
//
// We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
// or HIP type. However, mixed CUDA/HIP compilation is not supported.
using namespace tools::SYCL;
bool IsCuda =
llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
Expand Down Expand Up @@ -1122,12 +1110,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,

for (StringRef Val : SYCLTargetsValues->getValues()) {
StringRef UserTargetName(Val);
if (auto Device = isIntelGPUTarget(Val)) {
if (auto Device = gen::isGPUTarget<gen::IntelGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "spir64_gen";
} else if (auto Device = gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "nvptx64-nvidia-cuda";
} else if (auto Device = gen::isGPUTarget<gen::AmdGPU>(Val)) {
if (Device->empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
continue;
}
UserTargetName = "amdgcn-amd-amdhsa";
}

if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) {
Expand Down Expand Up @@ -5716,6 +5716,7 @@ class OffloadingActionBuilder final {
}

bool initialize() override {
using namespace tools::SYCL;
// Get the SYCL toolchains. If we don't get any, the action builder will
// know there is nothing to do related to SYCL offloading.
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
Expand Down Expand Up @@ -5755,15 +5756,35 @@ class OffloadingActionBuilder final {
llvm::StringMap<StringRef> FoundNormalizedTriples;
for (StringRef Val : SYCLTargetsValues->getValues()) {
StringRef UserTargetName(Val);
if (auto ValidDevice = isIntelGPUTarget(Val)) {
if (auto ValidDevice = gen::isGPUTarget<gen::IntelGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple(
"spir64_gen"), ValidDevice->data());
UserTargetName = "spir64_gen";
} else if (auto ValidDevice =
gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(
C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"),
ValidDevice->data());
UserTargetName = "nvptx64-nvidia-cuda";
} else if (auto ValidDevice = gen::isGPUTarget<gen::AmdGPU>(Val)) {
if (ValidDevice->empty())
// Unrecognized, we have already diagnosed this earlier; skip.
continue;
// Add the proper -device value to the list.
GpuArchList.emplace_back(
C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"),
ValidDevice->data());
UserTargetName = "amdgcn-amd-amdhsa";
}

llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
std::string NormalizedName = TT.normalize();

Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5192,10 +5192,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// between device and host where we should be able to use the offloading
// arch to add the macro to the host compile.
auto addTargetMacros = [&](const llvm::Triple &Triple) {
if (!Triple.isSPIR())
if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN())
return;
SmallString<64> Macro;
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
if ((Triple.isSPIR() &&
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) ||
Triple.isNVPTX() || Triple.isAMDGCN()) {
StringRef Device = JA.getOffloadingArch();
if (!Device.empty()) {
Macro = "-D";
Expand Down
182 changes: 136 additions & 46 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,60 +596,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
StringRef Device;
Device = llvm::StringSwitch<StringRef>(DeviceName)
.Cases("bdw", "8_0_0", "bdw")
.Cases("skl", "9_0_9", "skl")
.Cases("kbl", "9_1_9", "kbl")
.Cases("cfl", "9_2_9", "cfl")
.Cases("apl", "9_3_0", "apl")
.Cases("glk", "9_4_0", "glk")
.Cases("whl", "9_5_0", "whl")
.Cases("aml", "9_6_0", "aml")
.Cases("cml", "9_7_0", "cml")
.Cases("icllp", "11_0_0", "icllp")
.Cases("ehl", "11_2_0", "ehl")
.Cases("tgllp", "12_0_0", "tgllp")
.Case("rkl", "rkl")
.Case("adl_s", "adl_s")
.Case("rpl_s", "rpl_s")
.Case("adl_p", "adl_p")
.Case("adl_n", "adl_n")
.Cases("dg1", "12_10_0", "dg1")
.Case("acm_g10", "acm_g10")
.Case("acm_g11", "acm_g11")
.Case("acm_g12", "acm_g12")
.Case("pvc", "pvc")
.Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
.Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
.Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
.Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
.Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl")
.Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
.Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
.Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
.Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
.Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp")
.Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl")
.Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp")
.Case("intel_gpu_rkl", "rkl")
.Case("intel_gpu_adl_s", "adl_s")
.Case("intel_gpu_rpl_s", "rpl_s")
.Case("intel_gpu_adl_p", "adl_p")
.Case("intel_gpu_adl_n", "adl_n")
.Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
.Case("intel_gpu_acm_g10", "acm_g10")
.Case("intel_gpu_acm_g11", "acm_g11")
.Case("intel_gpu_acm_g12", "acm_g12")
.Case("intel_gpu_pvc", "pvc")
.Case("nvidia_gpu_sm_50", "sm_50")
.Case("nvidia_gpu_sm_52", "sm_52")
.Case("nvidia_gpu_sm_53", "sm_53")
.Case("nvidia_gpu_sm_60", "sm_60")
.Case("nvidia_gpu_sm_61", "sm_61")
.Case("nvidia_gpu_sm_62", "sm_62")
.Case("nvidia_gpu_sm_70", "sm_70")
.Case("nvidia_gpu_sm_72", "sm_72")
.Case("nvidia_gpu_sm_75", "sm_75")
.Case("nvidia_gpu_sm_80", "sm_80")
.Case("nvidia_gpu_sm_86", "sm_86")
.Case("nvidia_gpu_sm_87", "sm_87")
.Case("nvidia_gpu_sm_89", "sm_89")
.Case("nvidia_gpu_sm_90", "sm_90")
.Case("amd_gpu_gfx700", "gfx700")
.Case("amd_gpu_gfx701", "gfx701")
.Case("amd_gpu_gfx702", "gfx702")
.Case("amd_gpu_gfx801", "gfx801")
.Case("amd_gpu_gfx802", "gfx802")
.Case("amd_gpu_gfx803", "gfx803")
.Case("amd_gpu_gfx805", "gfx805")
.Case("amd_gpu_gfx810", "gfx810")
.Case("amd_gpu_gfx900", "gfx900")
.Case("amd_gpu_gfx902", "gfx902")
.Case("amd_gpu_gfx904", "gfx904")
.Case("amd_gpu_gfx906", "gfx906")
.Case("amd_gpu_gfx908", "gfx908")
.Case("amd_gpu_gfx90a", "gfx90a")
.Case("amd_gpu_gfx1010", "gfx1010")
.Case("amd_gpu_gfx1011", "gfx1011")
.Case("amd_gpu_gfx1012", "gfx1012")
.Case("amd_gpu_gfx1013", "gfx1013")
.Case("amd_gpu_gfx1030", "gfx1030")
.Case("amd_gpu_gfx1031", "gfx1031")
.Case("amd_gpu_gfx1032", "gfx1032")
.Default("");
return Device;
}

StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
SmallString<64> Macro;
StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
.Case("bdw", "BDW")
.Case("skl", "SKL")
.Case("kbl", "KBL")
.Case("cfl", "CFL")
.Case("apl", "APL")
.Case("glk", "GLK")
.Case("whl", "WHL")
.Case("aml", "AML")
.Case("cml", "CML")
.Case("icllp", "ICLLP")
.Case("ehl", "EHL")
.Case("tgllp", "TGLLP")
.Case("rkl", "RKL")
.Case("adl_s", "ADL_S")
.Case("rpl_s", "RPL_S")
.Case("adl_p", "ADL_P")
.Case("adl_n", "ADL_N")
.Case("dg1", "DG1")
.Case("acm_g10", "ACM_G10")
.Case("acm_g11", "ACM_G11")
.Case("acm_g12", "ACM_G12")
.Case("pvc", "PVC")
.Case("bdw", "INTEL_GPU_BDW")
.Case("skl", "INTEL_GPU_SKL")
.Case("kbl", "INTEL_GPU_KBL")
.Case("cfl", "INTEL_GPU_CFL")
.Case("apl", "INTEL_GPU_APL")
.Case("glk", "INTEL_GPU_GLK")
.Case("whl", "INTEL_GPU_WHL")
.Case("aml", "INTEL_GPU_AML")
.Case("cml", "INTEL_GPU_CML")
.Case("icllp", "INTEL_GPU_ICLLP")
.Case("ehl", "INTEL_GPU_EHL")
.Case("tgllp", "INTEL_GPU_TGLLP")
.Case("rkl", "INTEL_GPU_RKL")
.Case("adl_s", "INTEL_GPU_ADL_S")
.Case("rpl_s", "INTEL_GPU_RPL_S")
.Case("adl_p", "INTEL_GPU_ADL_P")
.Case("adl_n", "INTEL_GPU_ADL_N")
.Case("dg1", "INTEL_GPU_DG1")
.Case("acm_g10", "INTEL_GPU_ACM_G10")
.Case("acm_g11", "INTEL_GPU_ACM_G11")
.Case("acm_g12", "INTEL_GPU_ACM_G12")
.Case("pvc", "INTEL_GPU_PVC")
.Case("sm_50", "NVIDIA_GPU_SM_50")
.Case("sm_52", "NVIDIA_GPU_SM_52")
.Case("sm_53", "NVIDIA_GPU_SM_53")
.Case("sm_60", "NVIDIA_GPU_SM_60")
.Case("sm_61", "NVIDIA_GPU_SM_61")
.Case("sm_62", "NVIDIA_GPU_SM_62")
.Case("sm_70", "NVIDIA_GPU_SM_70")
.Case("sm_72", "NVIDIA_GPU_SM_72")
.Case("sm_75", "NVIDIA_GPU_SM_75")
.Case("sm_80", "NVIDIA_GPU_SM_80")
.Case("sm_86", "NVIDIA_GPU_SM_86")
.Case("sm_87", "NVIDIA_GPU_SM_87")
.Case("sm_89", "NVIDIA_GPU_SM_89")
.Case("sm_90", "NVIDIA_GPU_SM_90")
.Case("gfx700", "AMD_GPU_GFX700")
.Case("gfx701", "AMD_GPU_GFX701")
.Case("gfx702", "AMD_GPU_GFX702")
.Case("gfx801", "AMD_GPU_GFX801")
.Case("gfx802", "AMD_GPU_GFX802")
.Case("gfx803", "AMD_GPU_GFX803")
.Case("gfx805", "AMD_GPU_GFX805")
.Case("gfx810", "AMD_GPU_GFX810")
.Case("gfx900", "AMD_GPU_GFX900")
.Case("gfx902", "AMD_GPU_GFX902")
.Case("gfx904", "AMD_GPU_GFX904")
.Case("gfx906", "AMD_GPU_GFX906")
.Case("gfx908", "AMD_GPU_GFX908")
.Case("gfx90a", "AMD_GPU_GFX90A")
.Case("gfx1010", "AMD_GPU_GFX1010")
.Case("gfx1011", "AMD_GPU_GFX1011")
.Case("gfx1012", "AMD_GPU_GFX1012")
.Case("gfx1013", "AMD_GPU_GFX1013")
.Case("gfx1030", "AMD_GPU_GFX1030")
.Case("gfx1031", "AMD_GPU_GFX1031")
.Case("gfx1032", "AMD_GPU_GFX1032")
.Default("");
if (!Ext.empty()) {
Macro = "__SYCL_TARGET_INTEL_GPU_";
Macro = "__SYCL_TARGET_";
Macro += Ext;
Macro += "__";
}
Expand Down Expand Up @@ -759,6 +829,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
CmdArgs.push_back(Args.MakeArgString(TA));
}

void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
OptSpecifier Opt_EQ) const {
for (auto *A : Args) {
if (A->getOption().matches(Opt_EQ)) {
if (auto GpuDevice =
tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
A->getValue())) {
StringRef ArgString;
SmallString<64> OffloadArch("--offload-arch=");
OffloadArch += GpuDevice->data();
ArgString = OffloadArch;
parseTargetOpts(ArgString, Args, CmdArgs);
A->claim();
}
}
}
}

// Expects a specific type of option (e.g. -Xsycl-target-backend) and will
// extract the arguments.
void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
Expand Down Expand Up @@ -914,6 +1003,7 @@ void SYCLToolChain::TranslateBackendTargetArgs(
// Handle -Xsycl-target-backend.
TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend,
options::OPT_Xsycl_backend_EQ, Device);
TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
}

void SYCLToolChain::TranslateLinkerTargetArgs(
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
};

StringRef resolveGenDevice(StringRef DeviceName);
StringRef getGenDeviceMacro(StringRef DeviceName);
SmallString<64> getGenDeviceMacro(StringRef DeviceName);

// // Prefix for GPU specific targets used for -fsycl-targets
constexpr char IntelGPU[] = "intel_gpu_";
constexpr char NvidiaGPU[] = "nvidia_gpu_";
constexpr char AmdGPU[] = "amd_gpu_";

template <auto GPUArh> llvm::Optional<StringRef> isGPUTarget(StringRef Target) {
// Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*'
// here.
if (Target.startswith(GPUArh)) {
return resolveGenDevice(Target);
}
return llvm::None;
}

} // end namespace gen

Expand Down Expand Up @@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
llvm::opt::OptSpecifier Opt,
llvm::opt::OptSpecifier Opt_EQ,
StringRef Device) const;
void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
llvm::opt::OptSpecifier Opt_EQ) const;
};

} // end namespace toolchains
Expand Down
Loading