Skip to content

Commit 63ac3d3

Browse files
[SYCL] Add runtime support for device code argument elimination (#2315)
Add support for the byte array type properties. Register the new kernel parameter optimization properties while adding binary images in the program manager. This information is then used while setting kernel arguments to skip over the eliminated ones. Signed-off-by: Sergey Semenov <sergey.semenov@intel.com>
1 parent c11c090 commit 63ac3d3

File tree

8 files changed

+283
-16
lines changed

8 files changed

+283
-16
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,9 @@ using _pi_offload_entry = _pi_offload_entry_struct *;
581581
// A type of a binary image property.
582582
typedef enum {
583583
PI_PROPERTY_TYPE_UNKNOWN,
584-
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
585-
PI_PROPERTY_TYPE_STRING // null-terminated string
584+
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
585+
PI_PROPERTY_TYPE_BYTE_ARRAY, // byte array
586+
PI_PROPERTY_TYPE_STRING // null-terminated string
586587
} pi_property_type;
587588

588589
// Device binary image property.
@@ -652,6 +653,8 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
652653
#define PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants"
653654
/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h
654655
#define PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask"
656+
/// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h
657+
#define PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt"
655658

656659
/// This struct is a record of the device binary information. If the Kind field
657660
/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec

sycl/include/CL/sycl/detail/pi.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
#include <CL/sycl/detail/pi.h>
2121

2222
#include <cassert>
23+
#include <cstdint>
2324
#include <sstream>
2425
#include <string>
26+
#include <vector>
2527

2628
#ifdef XPTI_ENABLE_INSTRUMENTATION
2729
// Forward declarations
@@ -197,13 +199,30 @@ void printArgs(Arg0 arg0, Args... args) {
197199
pi::printArgs(std::forward<Args>(args)...);
198200
}
199201

202+
// A wrapper for passing around byte array properties
203+
class ByteArray {
204+
public:
205+
using ConstIterator = const std::uint8_t *;
206+
207+
ByteArray(const std::uint8_t *Ptr, std::size_t Size) : Ptr{Ptr}, Size{Size} {}
208+
const std::uint8_t &operator[](std::size_t Idx) const { return Ptr[Idx]; }
209+
std::size_t size() const { return Size; }
210+
ConstIterator begin() const { return Ptr; }
211+
ConstIterator end() const { return Ptr + Size; }
212+
213+
private:
214+
const std::uint8_t *Ptr;
215+
const std::size_t Size;
216+
};
217+
200218
// C++ wrapper over the _pi_device_binary_property_struct structure.
201219
class DeviceBinaryProperty {
202220
public:
203221
DeviceBinaryProperty(const _pi_device_binary_property_struct *Prop)
204222
: Prop(Prop) {}
205223

206224
pi_uint32 asUint32() const;
225+
ByteArray asByteArray() const;
207226
const char *asCString() const;
208227

209228
protected:
@@ -300,6 +319,9 @@ class DeviceBinaryImage {
300319
/// value is 32-bit unsigned integer ID.
301320
const PropertyRange &getSpecConstants() const { return SpecConstIDMap; }
302321
const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; }
322+
const PropertyRange &getKernelParamOptInfo() const {
323+
return KernelParamOptInfo;
324+
}
303325
virtual ~DeviceBinaryImage() {}
304326

305327
protected:
@@ -310,6 +332,7 @@ class DeviceBinaryImage {
310332
pi::PiDeviceBinaryType Format = PI_DEVICE_BINARY_TYPE_NONE;
311333
DeviceBinaryImage::PropertyRange SpecConstIDMap;
312334
DeviceBinaryImage::PropertyRange DeviceLibReqMask;
335+
DeviceBinaryImage::PropertyRange KernelParamOptInfo;
313336
};
314337

315338
/// Tries to determine the device binary image foramat. Returns

sycl/source/detail/pi.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,9 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
416416
case PI_PROPERTY_TYPE_UINT32:
417417
Out << "[UINT32] ";
418418
break;
419+
case PI_PROPERTY_TYPE_BYTE_ARRAY:
420+
Out << "[Byte array] ";
421+
break;
419422
case PI_PROPERTY_TYPE_STRING:
420423
Out << "[String] ";
421424
break;
@@ -429,11 +432,21 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
429432
case PI_PROPERTY_TYPE_UINT32:
430433
Out << P.asUint32();
431434
break;
435+
case PI_PROPERTY_TYPE_BYTE_ARRAY: {
436+
ByteArray BA = P.asByteArray();
437+
std::ios_base::fmtflags FlagsBackup = Out.flags();
438+
Out << std::hex;
439+
for (const auto &Byte : BA) {
440+
Out << "0x" << Byte << " ";
441+
}
442+
Out.flags(FlagsBackup);
443+
break;
444+
}
432445
case PI_PROPERTY_TYPE_STRING:
433446
Out << P.asCString();
434447
break;
435448
default:
436-
assert("unsupported property");
449+
assert(false && "Unsupported property");
437450
return Out;
438451
}
439452
return Out;
@@ -491,6 +504,13 @@ pi_uint32 DeviceBinaryProperty::asUint32() const {
491504
return sycl::detail::pi::asUint32(&Prop->ValSize);
492505
}
493506

507+
ByteArray DeviceBinaryProperty::asByteArray() const {
508+
assert(Prop->Type == PI_PROPERTY_TYPE_BYTE_ARRAY && "property type mismatch");
509+
assert(Prop->ValSize > 0 && "property size mismatch");
510+
const auto *Data = pi::cast<const std::uint8_t *>(Prop->ValAddr);
511+
return {Data, Prop->ValSize};
512+
}
513+
494514
const char *DeviceBinaryProperty::asCString() const {
495515
assert(Prop->Type == PI_PROPERTY_TYPE_STRING && "property type mismatch");
496516
assert(Prop->ValSize > 0 && "property size mismatch");
@@ -550,6 +570,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) {
550570

551571
SpecConstIDMap.init(Bin, PI_PROPERTY_SET_SPEC_CONST_MAP);
552572
DeviceLibReqMask.init(Bin, PI_PROPERTY_SET_DEVICELIB_REQ_MASK);
573+
KernelParamOptInfo.init(Bin, PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO);
553574
}
554575

555576
} // namespace pi

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include <algorithm>
2727
#include <cassert>
28+
#include <cstdint>
2829
#include <cstdlib>
2930
#include <cstring>
3031
#include <fstream>
@@ -397,6 +398,10 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M,
397398
Img.getLinkOptions(), PiDevices,
398399
ContextImpl->getCachedLibPrograms(), DeviceLibReqMask);
399400

401+
{
402+
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
403+
NativePrograms[BuiltProgram.get()] = &Img;
404+
}
400405
return BuiltProgram.release();
401406
};
402407

@@ -851,6 +856,23 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
851856
return Program;
852857
}
853858

859+
static ProgramManager::KernelArgMask
860+
createKernelArgMask(const pi::ByteArray &Bytes) {
861+
const int NBytesForSize = 8;
862+
const int NBitsInElement = 8;
863+
std::uint64_t SizeInBits = 0;
864+
for (int I = 0; I < NBytesForSize; ++I)
865+
SizeInBits |= static_cast<std::uint64_t>(Bytes[I]) << I * NBitsInElement;
866+
867+
ProgramManager::KernelArgMask Result;
868+
for (std::uint64_t I = 0; I < SizeInBits; ++I) {
869+
std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
870+
Result.push_back(Byte & (1 << (I % NBitsInElement)));
871+
}
872+
873+
return Result;
874+
}
875+
854876
void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
855877
std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
856878

@@ -860,6 +882,17 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
860882
const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
861883
const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
862884
auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);
885+
886+
// Fill the kernel argument mask map
887+
const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
888+
Img->getKernelParamOptInfo();
889+
if (KPOIRange.isAvailable()) {
890+
KernelNameToArgMaskMap &ArgMaskMap =
891+
m_EliminatedKernelArgMasks[Img.get()];
892+
for (const auto &Info : KPOIRange)
893+
ArgMaskMap[Info->Name] =
894+
createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
895+
}
863896
// Use the entry information if it's available
864897
if (EntriesB != EntriesE) {
865898
// The kernel sets for any pair of images are either disjoint or
@@ -1018,6 +1051,55 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
10181051
return 0xFFFFFFFF;
10191052
}
10201053

1054+
// TODO consider another approach with storing the masks in the integration
1055+
// header instead.
1056+
ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
1057+
OSModuleHandle M, const context &Context, pi::PiProgram NativePrg,
1058+
const string_class &KernelName, bool KnownProgram) {
1059+
// If instructed to use a spv file, assume no eliminated arguments.
1060+
if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
1061+
return {};
1062+
1063+
{
1064+
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1065+
auto ImgIt = NativePrograms.find(NativePrg);
1066+
if (ImgIt != NativePrograms.end()) {
1067+
auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
1068+
if (MapIt != m_EliminatedKernelArgMasks.end())
1069+
return MapIt->second[KernelName];
1070+
return {};
1071+
}
1072+
}
1073+
1074+
if (KnownProgram)
1075+
throw runtime_error("Program is not associated with a binary image",
1076+
PI_INVALID_VALUE);
1077+
1078+
// If not sure whether the program was built with one of the images, try
1079+
// finding the binary.
1080+
// TODO this can backfire in some extreme edge cases where there's a kernel
1081+
// name collision between our binaries and user-created native programs.
1082+
KernelSetId KSId;
1083+
try {
1084+
KSId = getKernelSetId(M, KernelName);
1085+
} catch (sycl::runtime_error &e) {
1086+
// If the kernel name wasn't found, assume that the program wasn't created
1087+
// from one of our device binary images.
1088+
if (e.get_cl_code() == PI_INVALID_KERNEL_NAME)
1089+
return {};
1090+
std::rethrow_exception(std::current_exception());
1091+
}
1092+
RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context);
1093+
{
1094+
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
1095+
NativePrograms[NativePrg] = &Img;
1096+
}
1097+
auto MapIt = m_EliminatedKernelArgMasks.find(&Img);
1098+
if (MapIt != m_EliminatedKernelArgMasks.end())
1099+
return MapIt->second[KernelName];
1100+
return {};
1101+
}
1102+
10211103
} // namespace detail
10221104
} // namespace sycl
10231105
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ enum class DeviceLibExt : std::uint32_t {
5858
// that is necessary for no interoperability cases with lambda.
5959
class ProgramManager {
6060
public:
61+
// TODO use a custom dynamic bitset instead to make initialization simpler.
62+
using KernelArgMask = std::vector<bool>;
63+
6164
// Returns the single instance of the program manager for the entire
6265
// process. Can only be called after staticInit is done.
6366
static ProgramManager &getInstance();
@@ -110,6 +113,22 @@ class ProgramManager {
110113
const RTDeviceBinaryImage *Img = nullptr);
111114
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
112115

116+
/// Returns the mask for eliminated kernel arguments for the requested kernel
117+
/// within the native program.
118+
/// \param M identifies the OS module the kernel comes from (multiple OS
119+
/// modules may have kernels with the same name).
120+
/// \param Context the context associated with the kernel.
121+
/// \param NativePrg the PI program associated with the kernel.
122+
/// \param KernelName the name of the kernel.
123+
/// \param KnownProgram indicates whether the PI program is guaranteed to
124+
/// be known to program manager (built with its API) or not (not
125+
/// cacheable or constructed with interoperability).
126+
KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
127+
const context &Context,
128+
pi::PiProgram NativePrg,
129+
const string_class &KernelName,
130+
bool KnownProgram);
131+
113132
private:
114133
ProgramManager();
115134
~ProgramManager() = default;
@@ -175,6 +194,8 @@ class ProgramManager {
175194
// - knowing which specialization constants are used in the program and
176195
// injecting their current values before compiling the SPIRV; the binary
177196
// image object has info about all spec constants used in the module
197+
// - finding kernel argument masks for kernels associated with each
198+
// pi_program
178199
// NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
179200
// referenced from outside SYCL runtime and RTDeviceBinaryImage object
180201
// lifetime matches program manager's one.
@@ -186,6 +207,14 @@ class ProgramManager {
186207

187208
/// Protects NativePrograms that can be changed by class' methods.
188209
std::mutex MNativeProgramsMutex;
210+
211+
using KernelNameToArgMaskMap =
212+
std::unordered_map<string_class, KernelArgMask>;
213+
/// Maps binary image and kernel name pairs to kernel argument masks which
214+
/// specify which arguments were eliminated during device code optimization.
215+
std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
216+
m_EliminatedKernelArgMasks;
217+
189218
/// True iff a SPIRV file has been specified with an environment variable
190219
bool m_UseSpvFile = false;
191220
};

0 commit comments

Comments
 (0)