Skip to content

Commit

Permalink
[SYCL] Define device trait macros from compiler driver (intel#10321)
Browse files Browse the repository at this point in the history
This PR implements the changes detailed in [Changes to the compiler
driver](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/DeviceAspectTraitDesign.md#changes-to-the-compiler-driver).

---------

Signed-off-by: Maronas, Marcos <marcos.maronas@intel.com>
  • Loading branch information
maarquitos14 authored Jul 24, 2023
1 parent aec8a35 commit 8c5c8eb
Show file tree
Hide file tree
Showing 9 changed files with 407 additions and 474 deletions.
15 changes: 15 additions & 0 deletions clang/include/clang/Driver/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,11 @@ class Driver {
/// construction of the device compilations.
mutable std::vector<std::string> SYCLTargetMacroArgs;

/// Vector of Macros related to Device Traits that need to be added to the
/// device compilation in a SYCL based offloading scenario. These macros are
/// gathered during creation of offloading device toolchains.
mutable llvm::opt::ArgStringList SYCLDeviceTraitsMacrosArgs;

/// Return the typical executable name for the specified driver \p Mode.
static const char *getExecutableForDriverMode(DriverMode Mode);

Expand Down Expand Up @@ -940,6 +945,16 @@ class Driver {
StringRef getSYCLUniqueID(StringRef FileName) const {
return SYCLUniqueIDList[FileName];
}

/// Reads device config file to find information about the SYCL targets in
/// UniqueSYCLTriplesVec, and defines device traits macros accordingly.
void populateSYCLDeviceTraitsMacrosArgs(
const llvm::opt::ArgList &Args,
const llvm::SmallVector<llvm::Triple, 4> &UniqueSYCLTriplesVec);

llvm::opt::ArgStringList getDeviceTraitsMacrosArgs() const {
return SYCLDeviceTraitsMacrosArgs;
}
};

/// \return True if the last defined optimization level is -Ofast.
Expand Down
176 changes: 133 additions & 43 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
#include "llvm/Option/OptSpecifier.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ExitCodes.h"
Expand All @@ -103,9 +104,9 @@
#include <cstdlib> // ::getenv
#include <map>
#include <memory>
#include <optional>
#include <regex>
#include <sstream>
#include <optional>
#include <utility>
#if LLVM_ON_UNIX
#include <unistd.h> // getpid
Expand Down Expand Up @@ -1309,6 +1310,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
}
}
}
// Define macros associated with `any_device_has/all_devices_have` according
// to the aspects defined in the DeviceConfigFile for the SYCL targets.
populateSYCLDeviceTraitsMacrosArgs(C.getInputArgs(), UniqueSYCLTriplesVec);
// We'll need to use the SYCL and host triples as the key into
// getOffloadingDeviceToolChain, because the device toolchains we're
// going to create will depend on both.
Expand Down Expand Up @@ -9704,9 +9708,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
return *TC;
}

const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
const llvm::Triple &Target, const ToolChain &HostTC,
const Action::OffloadKind &TargetDeviceOffloadKind) const {
const ToolChain &Driver::getOffloadingDeviceToolChain(
const ArgList &Args, const llvm::Triple &Target, const ToolChain &HostTC,
const Action::OffloadKind &TargetDeviceOffloadKind) const {
// Use device / host triples offload kind as the key into the ToolChains map
// because the device ToolChain we create depends on both.
auto &TC = ToolChains[Target.str() + "/" + HostTC.getTriple().str() +
Expand All @@ -9716,53 +9720,53 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
// the normal getToolChain call, as it seems a reasonable way to categorize
// things.
switch (TargetDeviceOffloadKind) {
case Action::OFK_Cuda:
TC = std::make_unique<toolchains::CudaToolChain>(
case Action::OFK_Cuda:
TC = std::make_unique<toolchains::CudaToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case Action::OFK_HIP: {
if (Target.getArch() == llvm::Triple::amdgcn &&
Target.getVendor() == llvm::Triple::AMD &&
Target.getOS() == llvm::Triple::AMDHSA)
TC = std::make_unique<toolchains::HIPAMDToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
else if (Target.getArch() == llvm::Triple::spirv64 &&
Target.getVendor() == llvm::Triple::UnknownVendor &&
Target.getOS() == llvm::Triple::UnknownOS)
TC = std::make_unique<toolchains::HIPSPVToolChain>(*this, Target,
HostTC, Args);
break;
}
case Action::OFK_OpenMP:
// omp + nvptx
TC = std::make_unique<toolchains::CudaToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case Action::OFK_SYCL:
switch (Target.getArch()) {
case llvm::Triple::spir:
case llvm::Triple::spir64:
TC = std::make_unique<toolchains::SYCLToolChain>(*this, Target, HostTC,
Args);
break;
case Action::OFK_HIP: {
if (Target.getArch() == llvm::Triple::amdgcn &&
Target.getVendor() == llvm::Triple::AMD &&
Target.getOS() == llvm::Triple::AMDHSA)
TC = std::make_unique<toolchains::HIPAMDToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
else if (Target.getArch() == llvm::Triple::spirv64 &&
Target.getVendor() == llvm::Triple::UnknownVendor &&
Target.getOS() == llvm::Triple::UnknownOS)
TC = std::make_unique<toolchains::HIPSPVToolChain>(*this, Target,
HostTC, Args);
break;
}
case Action::OFK_OpenMP:
// omp + nvptx
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
TC = std::make_unique<toolchains::CudaToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case Action::OFK_SYCL:
switch (Target.getArch()) {
case llvm::Triple::spir:
case llvm::Triple::spir64:
TC = std::make_unique<toolchains::SYCLToolChain>(
*this, Target, HostTC, Args);
break;
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
TC = std::make_unique<toolchains::CudaToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
case llvm::Triple::amdgcn:
TC = std::make_unique<toolchains::HIPAMDToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
default:
if (isSYCLNativeCPU(Args)) {
case llvm::Triple::amdgcn:
TC = std::make_unique<toolchains::HIPAMDToolChain>(
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
break;
default:
if (isSYCLNativeCPU(Args)) {
TC = std::make_unique<toolchains::SYCLToolChain>(*this, Target,
HostTC, Args);
}
break;
}
break;
}
break;
default:
default:
break;
}
}
Expand Down Expand Up @@ -10040,3 +10044,89 @@ llvm::Error driver::expandResponseFiles(SmallVectorImpl<const char *> &Args,

return llvm::Error::success();
}

void Driver::populateSYCLDeviceTraitsMacrosArgs(
const llvm::opt::ArgList &Args,
const llvm::SmallVector<llvm::Triple, 4> &UniqueSYCLTriplesVec) {
const auto &TargetTable = DeviceConfigFile::TargetTable;
std::map<StringRef, unsigned int> AllDevicesHave;
std::map<StringRef, bool> AnyDeviceHas;
bool AnyDeviceHasAnyAspect = false;
unsigned int ValidTargets = 0;
for (const auto &TargetTriple : UniqueSYCLTriplesVec) {
// Try and find the whole triple, if there's no match, remove parts of the
// triple from the end to find partial matches.
auto TargetTripleStr = TargetTriple.getTriple();
bool Found = false;
bool EmptyTriple = false;
auto TripleIt = TargetTable.end();
while (!Found && !EmptyTriple) {
TripleIt = TargetTable.find(TargetTripleStr);
Found = (TripleIt != TargetTable.end());
if (!Found) {
auto Pos = TargetTripleStr.find_last_of('-');
EmptyTriple = (Pos == std::string::npos);
TargetTripleStr =
EmptyTriple ? TargetTripleStr : TargetTripleStr.substr(0, Pos);
}
}
if (Found) {
assert(TripleIt != TargetTable.end());
const auto &TargetInfo = (*TripleIt).second;
++ValidTargets;
const auto &AspectList = TargetInfo.aspects;
const auto &MaySupportOtherAspects = TargetInfo.maySupportOtherAspects;
if (!AnyDeviceHasAnyAspect)
AnyDeviceHasAnyAspect = MaySupportOtherAspects;
for (const auto &aspect : AspectList) {
// If target has an entry in the config file, the set of aspects
// supported by all devices supporting the target is 'AspectList'. If
// there's no entry, such set is empty.
const auto &AspectIt = AllDevicesHave.find(aspect);
if (AspectIt != AllDevicesHave.end())
++AllDevicesHave[aspect];
else
AllDevicesHave[aspect] = 1;
// If target has an entry in the config file AND
// 'MaySupportOtherAspects' is false, the set of aspects supported by
// any device supporting the target is 'AspectList'. If there's no
// entry OR 'MaySupportOtherAspects' is true, such set contains all
// the aspects.
AnyDeviceHas[aspect] = true;
}
}
}

if (ValidTargets == 0) {
// If there's no entry for the target in the device config file, the set
// of aspects supported by any device supporting the target contains all
// the aspects.
AnyDeviceHasAnyAspect = true;
}

if (AnyDeviceHasAnyAspect) {
// There exists some target that supports any given aspect.
SmallString<64> MacroAnyDeviceAnyAspect(
"-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1");
SYCLDeviceTraitsMacrosArgs.push_back(
Args.MakeArgString(MacroAnyDeviceAnyAspect));
} else {
// Some of the aspects are not supported at all by any of the targets.
// Thus, we need to define individual macros for each supported aspect.
for (const auto &[TargetKey, SupportedTarget] : AnyDeviceHas) {
assert(SupportedTarget);
SmallString<64> MacroAnyDevice("-D__SYCL_ANY_DEVICE_HAS_");
MacroAnyDevice += TargetKey;
MacroAnyDevice += "__=1";
SYCLDeviceTraitsMacrosArgs.push_back(Args.MakeArgString(MacroAnyDevice));
}
}
for (const auto &[TargetKey, SupportedTargets] : AllDevicesHave) {
if (SupportedTargets != ValidTargets)
continue;
SmallString<64> MacroAllDevices("-D__SYCL_ALL_DEVICES_HAVE_");
MacroAllDevices += TargetKey;
MacroAllDevices += "__=1";
SYCLDeviceTraitsMacrosArgs.push_back(Args.MakeArgString(MacroAllDevices));
}
}
5 changes: 5 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5374,6 +5374,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
CmdArgs.push_back("-fsycl-esimd-force-stateless-mem");

const auto DeviceTraitsMacrosArgs = D.getDeviceTraitsMacrosArgs();
for (const auto &Arg : DeviceTraitsMacrosArgs) {
CmdArgs.push_back(Arg);
}
}

if (IsOpenMPDevice) {
Expand Down
21 changes: 21 additions & 0 deletions clang/test/Driver/sycl-device-traits-macros.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/// Check that no device traits macros are defined if sycl is disabled:
// RUN: %clang -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-DISABLED %s
// CHECK-DISABLED-NOT: "{{.*}}SYCL_ANY_DEVICE_HAS{{.*}}"
// CHECK-DISABLED-NOT: "{{.*}}SYCL_ALL_DEVICES_HAVE{{.*}}"

/// Check device traits macros are defined if sycl is enabled:
/// In this case, where no specific sycl targets are passed, the sycl
/// targets are spir64 and the host target (e.g. x86_64). We expect two
/// occurrences of the macro definition, one for host and one for device.
// RUN: %clang -fsycl -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-ENABLED %s
// CHECK-ENABLED-COUNT-2: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"

/// Check device traits macros are defined if sycl is enabled:
/// In this case the sycl targets are spir64, spir64_gen and the host
/// target (e.g. x86_64). We expect three occurrences of the macro
/// definition, one for host and one for each of the two devices.
// RUN: %clang -fsycl -fsycl-targets=spir64,spir64_gen -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-SYCL-TARGETS %s
// CHECK-SYCL-TARGETS-COUNT-3: "-D__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__=1"
2 changes: 2 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#include <string>
#include <vector>

namespace llvm {
class StringRef;
}

namespace DeviceConfigFile {

Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ def : TargetInfo<"__TestDeprecatedAspectList",
AspectUsm_restricted_shared_allocations, AspectHost],
[]>;

def : TargetInfo<"spir64", [], [], "", "", 1>;
def : TargetInfo<"spir64_gen", [], [], "", "", 1>;
def : TargetInfo<"spir64_x86_64", [], [], "", "", 1>;
def : TargetInfo<"spir64_fpga", [], [], "", "", 1>;
def : TargetInfo<"x86_64", [], [], "", "", 1>;
// Examples of how to use a combination of explicitly specified values + predefined lists
//defvar AspectList = [AspectCpu] # AllUSMAspects;
//def : TargetInfo<"Test", AspectList, []>;
Expand Down
20 changes: 10 additions & 10 deletions sycl/doc/design/DeviceAspectTraitDesign.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ When compiling a SYCL program, where $[t1, t2, \ldots, tn]$ are the $n$ targets
specified in `-fsycl-targets` including any targets implicitly added by the
driver, the driver defines the following macros in both host and device
compilation invocations:
* `__SYCL_ALL_DEVICES_HAVE_`$i$`__` as `1` for all $i$ in
* `__SYCL_ALL_DEVICES_HAVE_`$aspectName_{i}$`__` as `1` for all $i$ in
${\bigcap}^n_{k=1} A^{all}_{tk}$.
* `__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__` as `1` if
${\bigcup}^n_{k=1} A^{any}_{tk}$ is the set of all aspects.
* `__SYCL_ANY_DEVICE_HAS_`$j$`__` as `1` for all $j$ in
* `__SYCL_ANY_DEVICE_HAS_`$aspectName_{j}$`__` as `1` for all $j$ in
${\bigcup}^n_{k=1} A^{any}_{tk}$ if `__SYCL_ANY_DEVICE_HAS_ANY_ASPECT__` was not
defined.

Expand All @@ -79,19 +79,19 @@ together with specializations for each aspect:
```c++
namespace sycl {
template <aspect Aspect> all_devices_have;
template<> all_devices_have<aspect::host> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_0__> {};
template<> all_devices_have<aspect::cpu> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_1__> {};
template<> all_devices_have<aspect::gpu> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_2__> {};
template<> all_devices_have<aspect::host> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_host__> {};
template<> all_devices_have<aspect::cpu> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_cpu__> {};
template<> all_devices_have<aspect::gpu> : std::bool_constant<__SYCL_ALL_DEVICES_HAVE_gpu__> {};
...

#ifdef __SYCL_ANY_DEVICE_HAS_ANY_ASPECT__
// Special case where any_device_has is trivially true.
template <aspect Aspect> any_device_has : std::true_type {};
#else
template <aspect Aspect> any_device_has;
template<> any_device_has<aspect::host> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_0__> {};
template<> any_device_has<aspect::cpu> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_1__> {};
template<> any_device_has<aspect::gpu> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_2__> {};
template<> any_device_has<aspect::host> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_host__> {};
template<> any_device_has<aspect::cpu> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_cpu__> {};
template<> any_device_has<aspect::gpu> : std::bool_constant<__SYCL_ANY_DEVICE_HAS_gpu__> {};
...
#endif // __SYCL_ANY_DEVICE_HAS_ANY_ASPECT__

Expand All @@ -102,8 +102,8 @@ template <aspect Aspect> constexpr bool any_device_has_v = any_device_has<Aspect
Note that the driver may not define macros for all aspects as it only knows the
specified subset from the configuration file. As such the device headers will
have to define any undefined `__SYCL_ANY_DEVICE_HAS_`$i$`__` and
`__SYCL_ALL_DEVICES_HAVE_`$i$`__` as `0` for all aspect values $i$.
have to define any undefined `__SYCL_ANY_DEVICE_HAS_`$aspectName_{i}$`__` and
`__SYCL_ALL_DEVICES_HAVE_`$aspectName_{i}$`__` as `0` for all aspect values $i$.
Since the specializations need to be explicitly specified, there is a high
probability of mistakes when new aspects are added. To avoid such mistakes, a
Expand Down
Loading

0 comments on commit 8c5c8eb

Please sign in to comment.