Skip to content

Commit e6251b7

Browse files
dpcpp_kernels.cpp exports functions that compile functions to sycl::kernel
test_sycl_queue_submit.cpp uses dpcpp_kernel to get kernels and submit them using DPCTLQueue_SubmitRange, and _SubmitNDRange in the future. This does not work with coverage turned off due to LLVM version incompatibility between IGC and DPCPP.
1 parent 521d277 commit e6251b7

File tree

4 files changed

+227
-1
lines changed

4 files changed

+227
-1
lines changed

dpctl-capi/tests/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ foreach(tf ${spirv-test-files})
2121
file(COPY ${tf} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
2222
endforeach()
2323

24+
add_library(dpcpp_kernels
25+
STATIC
26+
${CMAKE_CURRENT_SOURCE_DIR}/dpcpp_kernels.cpp
27+
)
28+
2429
if(DPCTL_GENERATE_COVERAGE)
2530
file(GLOB_RECURSE
2631
sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
@@ -35,11 +40,12 @@ if(DPCTL_GENERATE_COVERAGE)
3540
list(REMOVE_ITEM
3641
dpctl_sources
3742
"${CMAKE_CURRENT_SOURCE_DIR}/../source/dpctl_vector_templ.cpp"
43+
"${CMAKE_CURRENT_SOURCE_DIR}/../source/dpcpp_kernels.cpp"
3844
)
3945

4046
# Add profiling flags
4147
set(CMAKE_CXX_FLAGS
42-
"${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping"
48+
"${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping -DDPCTL_COVERAGE"
4349
)
4450

4551
# Add all dpctl sources into a single executable so that we can run coverage
@@ -55,6 +61,7 @@ if(DPCTL_GENERATE_COVERAGE)
5561
GTest::GTest
5662
${IntelSycl_OPENCL_LIBRARY}
5763
${CMAKE_DL_LIBS}
64+
dpcpp_kernels
5865
)
5966
add_custom_target(llvm-cov
6067
COMMAND ${CMAKE_MAKE_PROGRAM} dpctl_c_api_tests
@@ -96,6 +103,7 @@ else()
96103
GTest::GTest
97104
DPCTLSyclInterface
98105
${LEVEL_ZERO_LIBRARY}
106+
dpcpp_kernels
99107
)
100108
endif()
101109

dpctl-capi/tests/dpcpp_kernels.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include "dpcpp_kernels.hpp"
2+
#include <CL/sycl.hpp>
3+
#include <cstddef>
4+
5+
template sycl::kernel
6+
dpcpp_kernels::get_fill_kernel<int>(sycl::queue &, size_t, int *, int);
7+
8+
template sycl::kernel
9+
dpcpp_kernels::get_range_kernel<int>(sycl::queue &, size_t, int *);
10+
11+
template sycl::kernel dpcpp_kernels::get_mad_kernel<int, int>(sycl::queue &,
12+
size_t,
13+
int *,
14+
int *,
15+
int *,
16+
int);

dpctl-capi/tests/dpcpp_kernels.hpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#pragma once
2+
#include <CL/sycl.hpp>
3+
4+
namespace dpcpp_kernels
5+
{
6+
7+
namespace
8+
{
9+
template <typename T> class populate_a;
10+
11+
template <typename T> class populate_b;
12+
13+
template <typename T, typename scT> class mad_kern;
14+
15+
template <typename name, class kernelFunc>
16+
auto make_cgh_function(int n, kernelFunc func)
17+
{
18+
auto Kernel = [&](sycl::handler &cgh) {
19+
cgh.parallel_for<name>(sycl::range<1>(n), func);
20+
};
21+
return Kernel;
22+
};
23+
24+
template <typename Ty, typename scT> struct MadFunc
25+
{
26+
const Ty *in1, *in2;
27+
Ty *out;
28+
scT val;
29+
MadFunc(const Ty *a, const Ty *b, Ty *c, scT d)
30+
: in1(a), in2(b), out(c), val(d)
31+
{
32+
}
33+
void operator()(sycl::id<1> myId) const
34+
{
35+
auto gid = myId[0];
36+
out[gid] = in1[gid] + val * in2[gid];
37+
return;
38+
}
39+
};
40+
41+
template <typename T> struct FillFunc
42+
{
43+
T *out;
44+
T val;
45+
FillFunc(T *a, T val) : out(a), val(val) {}
46+
void operator()(sycl::id<1> myId) const
47+
{
48+
auto gid = myId[0];
49+
out[gid] = val;
50+
return;
51+
};
52+
};
53+
54+
template <typename T> struct RangeFunc
55+
{
56+
T *out;
57+
RangeFunc(T *b) : out(b) {}
58+
void operator()(sycl::id<1> myId) const
59+
{
60+
auto gid = myId[0];
61+
out[gid] = T(gid);
62+
return;
63+
};
64+
};
65+
66+
} // namespace
67+
68+
template <typename T>
69+
sycl::kernel get_fill_kernel(sycl::queue &q, size_t n, T *out, T fill_val)
70+
{
71+
// out[i] = fill_val
72+
sycl::program program(q.get_context());
73+
74+
[[maybe_unused]] auto cgh_fn =
75+
make_cgh_function<class populate_a<T>>(n, FillFunc<T>(out, fill_val));
76+
77+
program.build_with_kernel_type<populate_a<T>>();
78+
return program.get_kernel<populate_a<T>>();
79+
};
80+
81+
template <typename T>
82+
sycl::kernel get_range_kernel(sycl::queue &q, size_t n, T *b)
83+
{
84+
// b[i] = i
85+
sycl::program program(q.get_context());
86+
87+
[[maybe_unused]] auto cgh_fn =
88+
make_cgh_function<class populate_b<T>>(n, RangeFunc<T>(b));
89+
90+
program.build_with_kernel_type<populate_b<T>>();
91+
return program.get_kernel<populate_b<T>>();
92+
};
93+
94+
template <typename T, typename scT>
95+
sycl::kernel
96+
get_mad_kernel(sycl::queue &q, size_t n, T *in1, T *in2, T *out, scT val)
97+
{
98+
// c[i] = a[i] + b[i] * val
99+
sycl::program program(q.get_context());
100+
101+
[[maybe_unused]] auto cgh_fn = make_cgh_function<class mad_kern<T, scT>>(
102+
n, MadFunc<T, scT>(in1, in2, out, val));
103+
104+
program.build_with_kernel_type<mad_kern<T, scT>>();
105+
return program.get_kernel<mad_kern<T, scT>>();
106+
};
107+
108+
} // namespace dpcpp_kernels

dpctl-capi/tests/test_sycl_queue_submit.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
//===----------------------------------------------------------------------===//
2525

2626
#include "Support/CBindingWrapping.h"
27+
#include "dpcpp_kernels.hpp"
2728
#include "dpctl_sycl_context_interface.h"
2829
#include "dpctl_sycl_device_interface.h"
2930
#include "dpctl_sycl_device_selector_interface.h"
@@ -123,3 +124,96 @@ TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy)
123124
DPCTLDevice_Delete(DRef);
124125
DPCTLDeviceSelector_Delete(DSRef);
125126
}
127+
128+
#ifndef DPCTL_COVERAGE
129+
namespace
130+
{
131+
132+
template <typename T,
133+
DPCTLKernelArgType katT,
134+
typename scT,
135+
DPCTLKernelArgType katscT>
136+
bool common_submit_range_fn(sycl::queue &q, size_t n, scT val)
137+
{
138+
T *a = sycl::malloc_device<T>(n, q);
139+
T *b = sycl::malloc_device<T>(n, q);
140+
T *c = sycl::malloc_device<T>(n, q);
141+
T fill_val = 1;
142+
size_t Range[] = {n};
143+
144+
auto popA_kernel = dpcpp_kernels::get_fill_kernel<T>(q, n, a, fill_val);
145+
auto popB_kernel = dpcpp_kernels::get_range_kernel<T>(q, n, b);
146+
auto mad_kernel = dpcpp_kernels::get_mad_kernel<T, scT>(q, n, a, b, c, val);
147+
148+
DPCTLSyclKernelRef popAKernRef =
149+
reinterpret_cast<DPCTLSyclKernelRef>(&popA_kernel);
150+
DPCTLSyclKernelRef popBKernRef =
151+
reinterpret_cast<DPCTLSyclKernelRef>(&popB_kernel);
152+
DPCTLSyclKernelRef madKernRef =
153+
reinterpret_cast<DPCTLSyclKernelRef>(&mad_kernel);
154+
155+
DPCTLSyclQueueRef QRef = reinterpret_cast<DPCTLSyclQueueRef>(&q);
156+
void *popAArgs[] = {reinterpret_cast<void *>(a),
157+
reinterpret_cast<void *>(&fill_val)};
158+
DPCTLKernelArgType popAKernelArgTypes[] = {DPCTL_VOID_PTR, katT};
159+
160+
DPCTLSyclEventRef popAERef =
161+
DPCTLQueue_SubmitRange(popAKernRef, QRef, popAArgs, popAKernelArgTypes,
162+
2, Range, 1, nullptr, 0);
163+
164+
void *popBArgs[] = {reinterpret_cast<void *>(b)};
165+
DPCTLKernelArgType popBKernelArgTypes[] = {DPCTL_VOID_PTR};
166+
167+
DPCTLSyclEventRef popBERef =
168+
DPCTLQueue_SubmitRange(popBKernRef, QRef, popBArgs, popBKernelArgTypes,
169+
1, Range, 1, nullptr, 0);
170+
171+
void *madArgs[] = {reinterpret_cast<void *>(a), reinterpret_cast<void *>(b),
172+
reinterpret_cast<void *>(c),
173+
reinterpret_cast<void *>(&val)};
174+
DPCTLKernelArgType madKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR,
175+
DPCTL_VOID_PTR, katscT};
176+
177+
DPCTLSyclEventRef deps[2] = {popAERef, popBERef};
178+
DPCTLSyclEventRef madRef = DPCTLQueue_SubmitRange(
179+
madKernRef, QRef, madArgs, madKernelArgTypes, 4, Range, 1, deps, 2);
180+
181+
DPCTLQueue_Wait(QRef);
182+
DPCTLEvent_Delete(madRef);
183+
DPCTLEvent_Delete(popBERef);
184+
DPCTLEvent_Delete(popAERef);
185+
186+
bool worked = true;
187+
T *host_data = new T[n];
188+
q.memcpy(host_data, c, n * sizeof(T)).wait();
189+
for (size_t i = 0; i < n; ++i) {
190+
worked = worked && (host_data[i] == T(fill_val) + T(i) * T(val));
191+
}
192+
delete[] host_data;
193+
194+
sycl::free(c, q);
195+
sycl::free(b, q);
196+
sycl::free(a, q);
197+
198+
return worked;
199+
}
200+
201+
} // namespace
202+
203+
struct TestQueueSubmitRange : public ::testing::Test
204+
{
205+
sycl::queue q;
206+
size_t n_elems = 512;
207+
208+
TestQueueSubmitRange() : q(sycl::default_selector{}) {}
209+
~TestQueueSubmitRange() {}
210+
};
211+
212+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeInt)
213+
{
214+
bool worked = false;
215+
worked = common_submit_range_fn<int, DPCTL_INT, int, DPCTL_INT>(q, n_elems,
216+
int(-1));
217+
EXPECT_TRUE(worked);
218+
}
219+
#endif

0 commit comments

Comments
 (0)