Skip to content

Commit b7e8523

Browse files
[SYCL] Implement the sycl_oneapi_raw_kernel_arg extension (#14335)
This commit implements the sycl_oneapi_raw_kernel_arg extension. --------- Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
1 parent 2d93042 commit b7e8523

File tree

11 files changed

+334
-13
lines changed

11 files changed

+334
-13
lines changed

sycl/doc/extensions/proposed/sycl_ext_oneapi_raw_kernel_arg.asciidoc renamed to sycl/doc/extensions/experimental/sycl_ext_oneapi_raw_kernel_arg.asciidoc

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,21 @@ SYCL specification refer to that revision.
4444

4545
== Status
4646

47-
This is a proposed extension specification, intended to gather community
48-
feedback. Interfaces defined in this specification may not be implemented yet
49-
or may be in a preliminary state. The specification itself may also change in
50-
incompatible ways before it is finalized. *Shipping software products should
51-
not rely on APIs defined in this specification.*
47+
This is an experimental extension specification, intended to provide early
48+
access to features and gather community feedback. Interfaces defined in this
49+
specification are implemented in {dpcpp}, but they are not finalized and may
50+
change incompatibly in future versions of {dpcpp} without prior notice.
51+
*Shipping software products should not rely on APIs defined in this
52+
specification.*
53+
54+
55+
== Backend support status
56+
57+
This extension is currently implemented in {dpcpp} only for GPU devices and
58+
only when using the Level Zero backend. Attempting to use this extension in
59+
kernels that run on other devices or backends may result in undefined
60+
behavior. Be aware that the compiler is not able to issue a diagnostic to
61+
warn you if this happens.
5262

5363

5464
== Overview
@@ -71,7 +81,7 @@ char* opaque_type;
7181
int nbytes;
7282
...
7383
h.set_arg(0, a);
74-
h.set_arg(1, sycl::ext::oneapi::raw_kernel_arg(opaque_type, nbytes));
84+
h.set_arg(1, sycl::ext::oneapi::experimental::raw_kernel_arg(opaque_type, nbytes));
7585
h.parallel_for(range, kernel);
7686
----
7787

@@ -94,7 +104,8 @@ implementation supports.
94104
|Description
95105

96106
|1
97-
|Initial version of this extension.
107+
|The APIs of this experimental extension are not versioned, so the
108+
feature-test macro always has this value.
98109
|===
99110

100111
=== The `raw_kernel_arg` class
@@ -104,26 +115,28 @@ kernel arguments via a raw byte representation.
104115

105116
[source,c++]
106117
----
107-
namespace sycl::ext::oneapi {
118+
namespace sycl::ext::oneapi::experimental {
108119
109120
class raw_kernel_arg {
110121
public:
111-
raw_kernel_arg(void* bytes, size_t count);
122+
raw_kernel_arg(const void* bytes, size_t count);
112123
};
113124
114-
} // namespace sycl::ext::oneapi
125+
} // namespace sycl::ext::oneapi::experimental
115126
----
116127

117128
[source,c++]
118129
----
119-
raw_kernel_arg(void* bytes, size_t count);
130+
raw_kernel_arg(const void* bytes, size_t count);
120131
----
121132
_Preconditions_: `bytes` must point to an array of at least `count` bytes,
122133
which is the byte representation of a kernel argument that is trivially
123134
copyable.
124135

125136
_Effects_: Constructs a `raw_kernel_arg` representing a view of the `count`
126-
bytes starting at the address specified by `bytes`.
137+
bytes starting at the address specified by `bytes`. Since the `raw_kernel_arg`
138+
object is only a view, the caller must ensure that the lifetime of the `bytes`
139+
memory lasts at least as long as the lifetime of the `raw_kernel_arg` object.
127140

128141
=== Using a raw kernel argument
129142

@@ -138,7 +151,7 @@ argument in `args` was passed to `set_arg` ", adding a new overload of
138151

139152
[source,c++]
140153
----
141-
void set_arg(int argIndex, sycl::ext::oneapi::raw_kernel_arg&& arg);
154+
void set_arg(int argIndex, sycl::ext::oneapi::experimental::raw_kernel_arg&& arg);
142155
----
143156
_Effects_: Sets the kernel argument associated with index `argIndex` using the
144157
bytes represented by `arg`.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//==--- raw_kernel_arg.hpp --- SYCL extension for raw kernel args ----------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <stddef.h>
12+
13+
namespace sycl {
14+
inline namespace _V1 {
15+
16+
class handler;
17+
18+
namespace ext::oneapi::experimental {
19+
20+
class raw_kernel_arg {
21+
public:
22+
raw_kernel_arg(const void *bytes, size_t count)
23+
: MArgData(bytes), MArgSize(count) {}
24+
25+
private:
26+
const void *MArgData;
27+
size_t MArgSize;
28+
29+
friend class sycl::handler;
30+
};
31+
32+
} // namespace ext::oneapi::experimental
33+
} // namespace _V1
34+
} // namespace sycl

sycl/include/sycl/handler.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <sycl/ext/oneapi/device_global/device_global.hpp>
3434
#include <sycl/ext/oneapi/device_global/properties.hpp>
3535
#include <sycl/ext/oneapi/experimental/graph.hpp>
36+
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
3637
#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
3738
#include <sycl/ext/oneapi/experimental/virtual_functions.hpp>
3839
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
@@ -523,6 +524,14 @@ class __SYCL_EXPORT handler {
523524
return Storage;
524525
}
525526

527+
void *
528+
storeRawArg(const sycl::ext::oneapi::experimental::raw_kernel_arg &RKA) {
529+
CGData.MArgsStorage.emplace_back(RKA.MArgSize);
530+
void *Storage = static_cast<void *>(CGData.MArgsStorage.back().data());
531+
std::memcpy(Storage, RKA.MArgData, RKA.MArgSize);
532+
return Storage;
533+
}
534+
526535
void setType(detail::CG::CGTYPE Type) { MCGType = Type; }
527536

528537
detail::CG::CGTYPE getType() { return MCGType; }
@@ -758,6 +767,14 @@ class __SYCL_EXPORT handler {
758767
registerDynamicParameter(DynamicParam, ArgIndex);
759768
}
760769

770+
// setArgHelper for the raw_kernel_arg extension type.
771+
void setArgHelper(int ArgIndex,
772+
sycl::ext::oneapi::experimental::raw_kernel_arg &&Arg) {
773+
auto StoredArg = storeRawArg(Arg);
774+
MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout, StoredArg,
775+
Arg.MArgSize, ArgIndex);
776+
}
777+
761778
/// Registers a dynamic parameter with the handler for later association with
762779
/// the node being created
763780
/// @param DynamicParamBase
@@ -2047,6 +2064,11 @@ class __SYCL_EXPORT handler {
20472064
setArgHelper(argIndex, dynamicParam);
20482065
}
20492066

2067+
// set_arg for the raw_kernel_arg extension type.
2068+
void set_arg(int argIndex, ext::oneapi::experimental::raw_kernel_arg &&Arg) {
2069+
setArgHelper(argIndex, std::move(Arg));
2070+
}
2071+
20502072
/// Sets arguments for OpenCL interoperability kernels.
20512073
///
20522074
/// Registers pack of arguments(Args) with indexes starting from 0.

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
9898
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
9999
#include <sycl/ext/oneapi/experimental/profiling_tag.hpp>
100+
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
100101
#include <sycl/ext/oneapi/experimental/root_group.hpp>
101102
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
102103
#include <sycl/ext/oneapi/filter_selector.hpp>

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ inline namespace _V1 {
109109
#define SYCL_EXT_ONEAPI_FREE_FUNCTION_KERNELS 1
110110
#define SYCL_EXT_ONEAPI_PROD 1
111111
#define SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS 1
112+
#define SYCL_EXT_ONEAPI_RAW_KERNEL_ARG 1
112113

113114
#ifndef __has_include
114115
#define __has_include(x) 0
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// REQUIRES: ocloc && level_zero
3+
4+
// RUN: %{build} -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// Tests raw_kernel_arg in different combinations.
8+
9+
#include <sycl/detail/core.hpp>
10+
#include <sycl/usm.hpp>
11+
12+
constexpr size_t NumArgs = 4;
13+
14+
auto constexpr CLSource = R"===(
15+
__kernel void Kernel(int in1, char in2, __global float *out, float in3) {
16+
out[0] = (float)in1 + (float)in2 + in3;
17+
}
18+
)===";
19+
20+
template <typename T>
21+
void SetArg(sycl::handler &CGH, T &&Arg, size_t Index, size_t Iteration) {
22+
// Pick how we set the arg based on the bit at Index in Iteration.
23+
if (Iteration & (1 << Index))
24+
CGH.set_arg(Index, sycl::ext::oneapi::experimental::raw_kernel_arg(
25+
&Arg, sizeof(T)));
26+
else
27+
CGH.set_arg(Index, Arg);
28+
}
29+
30+
int main() {
31+
sycl::queue Q;
32+
33+
auto SourceKB =
34+
sycl::ext::oneapi::experimental::create_kernel_bundle_from_source(
35+
Q.get_context(),
36+
sycl::ext::oneapi::experimental::source_language::opencl, CLSource);
37+
auto ExecKB = sycl::ext::oneapi::experimental::build(SourceKB);
38+
39+
int Failed = 0;
40+
41+
float *Out = sycl::malloc_shared<float>(1, Q);
42+
int32_t IntVal = 42;
43+
char CharVal = 100;
44+
float FloatVal = 1.23;
45+
46+
float Expected =
47+
static_cast<float>(IntVal) + static_cast<float>(CharVal) + FloatVal;
48+
for (size_t I = 0; I < (2 >> (NumArgs - 1)); ++I) {
49+
Out[0] = 0.0f;
50+
Q.submit([&](sycl::handler &CGH) {
51+
SetArg(CGH, IntVal, 0, I);
52+
SetArg(CGH, CharVal, 1, I);
53+
SetArg(CGH, Out, 2, I);
54+
SetArg(CGH, FloatVal, 3, I);
55+
CGH.single_task(ExecKB.ext_oneapi_get_kernel("Kernel"));
56+
}).wait();
57+
58+
if (Out[0] != Expected) {
59+
std::cout << "Failed for iteration " << I << ": " << Out[0]
60+
<< " != " << Expected << std::endl;
61+
++Failed;
62+
}
63+
}
64+
65+
sycl::free(Out, Q);
66+
return Failed;
67+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// REQUIRES: ocloc && level_zero
3+
4+
// RUN: %{build} -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// Tests raw_kernel_arg with pointers and scalars to different types with
8+
// different sizes.
9+
10+
#include <sycl/detail/core.hpp>
11+
#include <sycl/usm.hpp>
12+
13+
auto constexpr CLSource = R"===(
14+
__kernel void Kernel1(int in, __global int *out) {
15+
out[0] = in;
16+
}
17+
18+
__kernel void Kernel2(short in, __global short *out) {
19+
out[0] = in;
20+
}
21+
)===";
22+
23+
int main() {
24+
sycl::queue Q;
25+
26+
auto SourceKB =
27+
sycl::ext::oneapi::experimental::create_kernel_bundle_from_source(
28+
Q.get_context(),
29+
sycl::ext::oneapi::experimental::source_language::opencl, CLSource);
30+
auto ExecKB = sycl::ext::oneapi::experimental::build(SourceKB);
31+
32+
int32_t *IntOut = sycl::malloc_shared<int32_t>(1, Q);
33+
int16_t *ShortOut = sycl::malloc_shared<int16_t>(1, Q);
34+
int32_t IntVal = 42;
35+
int16_t ShortVal = 24;
36+
37+
for (size_t I = 0; I < 2; ++I) {
38+
std::string KernelName = I == 0 ? "Kernel1" : "Kernel2";
39+
Q.submit([&](sycl::handler &CGH) {
40+
sycl::ext::oneapi::experimental::raw_kernel_arg KernelArg0 =
41+
I == 0 ? sycl::ext::oneapi::experimental::raw_kernel_arg(
42+
&IntVal, sizeof(int32_t))
43+
: sycl::ext::oneapi::experimental::raw_kernel_arg(
44+
&ShortVal, sizeof(int16_t));
45+
sycl::ext::oneapi::experimental::raw_kernel_arg KernelArg1 =
46+
I == 0 ? sycl::ext::oneapi::experimental::raw_kernel_arg(
47+
&IntOut, sizeof(int32_t *))
48+
: sycl::ext::oneapi::experimental::raw_kernel_arg(
49+
&ShortOut, sizeof(int16_t *));
50+
51+
CGH.set_arg(0, KernelArg0);
52+
CGH.set_arg(1, KernelArg1);
53+
CGH.single_task(ExecKB.ext_oneapi_get_kernel(KernelName));
54+
}).wait();
55+
}
56+
57+
assert(IntOut[0] == IntVal);
58+
assert(ShortOut[0] == ShortVal);
59+
60+
sycl::free(IntOut, Q);
61+
sycl::free(ShortOut, Q);
62+
return 0;
63+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// REQUIRES: ocloc && level_zero
3+
4+
// RUN: %{build} -o %t.out
5+
// RUN: %{run} %t.out
6+
7+
// Tests raw_kernel_arg with pointers and scalars to different 32-bit types.
8+
9+
#include <sycl/detail/core.hpp>
10+
#include <sycl/usm.hpp>
11+
12+
auto constexpr CLSource = R"===(
13+
__kernel void Kernel1(int in, __global int *out) {
14+
out[0] = in;
15+
}
16+
17+
__kernel void Kernel2(float in, __global float *out) {
18+
out[0] = in;
19+
}
20+
)===";
21+
22+
int main() {
23+
sycl::queue Q;
24+
25+
auto SourceKB =
26+
sycl::ext::oneapi::experimental::create_kernel_bundle_from_source(
27+
Q.get_context(),
28+
sycl::ext::oneapi::experimental::source_language::opencl, CLSource);
29+
auto ExecKB = sycl::ext::oneapi::experimental::build(SourceKB);
30+
31+
int32_t *IntOut = sycl::malloc_shared<int32_t>(1, Q);
32+
float *FloatOut = sycl::malloc_shared<float>(1, Q);
33+
int32_t IntVal = 42;
34+
float FloatVal = 3.12f;
35+
36+
for (size_t I = 0; I < 2; ++I) {
37+
std::string KernelName = I == 0 ? "Kernel1" : "Kernel2";
38+
Q.submit([&](sycl::handler &CGH) {
39+
sycl::ext::oneapi::experimental::raw_kernel_arg KernelArg0 =
40+
I == 0 ? sycl::ext::oneapi::experimental::raw_kernel_arg(
41+
&IntVal, sizeof(int32_t))
42+
: sycl::ext::oneapi::experimental::raw_kernel_arg(
43+
&FloatVal, sizeof(float));
44+
sycl::ext::oneapi::experimental::raw_kernel_arg KernelArg1 =
45+
I == 0 ? sycl::ext::oneapi::experimental::raw_kernel_arg(
46+
&IntOut, sizeof(int32_t *))
47+
: sycl::ext::oneapi::experimental::raw_kernel_arg(
48+
&FloatOut, sizeof(float *));
49+
50+
CGH.set_arg(0, KernelArg0);
51+
CGH.set_arg(1, KernelArg1);
52+
CGH.single_task(ExecKB.ext_oneapi_get_kernel(KernelName));
53+
}).wait();
54+
}
55+
56+
assert(IntOut[0] == IntVal);
57+
assert(FloatOut[0] == FloatVal);
58+
59+
sycl::free(IntOut, Q);
60+
sycl::free(FloatOut, Q);
61+
return 0;
62+
}

0 commit comments

Comments
 (0)