Skip to content

Commit 51c0dff

Browse files
Attempt to make use of DPC++ generated kernels in testing of DPCTLQueue_SubmitRange
1 parent 521d277 commit 51c0dff

File tree

1 file changed

+303
-0
lines changed

1 file changed

+303
-0
lines changed

dpctl-capi/tests/test_sycl_queue_submit.cpp

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,306 @@ TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy)
123123
DPCTLDevice_Delete(DRef);
124124
DPCTLDeviceSelector_Delete(DSRef);
125125
}
126+
127+
namespace
128+
{
129+
130+
template <typename T> class populate_a;
131+
132+
template <typename T> class populate_b;
133+
134+
template <typename T, typename scT> class add_a_and_b;
135+
136+
template <typename T> struct kernel_arg_t
137+
{
138+
static constexpr DPCTLKernelArgType value = DPCTL_VOID_PTR;
139+
};
140+
141+
/*
142+
template <>
143+
struct kernel_arg_t<char> {
144+
static constexpr DPCTLKernelArgType value = DPCTL_CHAR;
145+
};
146+
147+
template <>
148+
struct kernel_arg_t<signed char> {
149+
static constexpr DPCTLKernelArgType value = DPCTL_SIGNED_CHAR;
150+
};
151+
152+
template <>
153+
struct kernel_arg_t<unsigned char> {
154+
static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_CHAR;
155+
};
156+
*/
157+
template <> struct kernel_arg_t<short>
158+
{
159+
static constexpr DPCTLKernelArgType value = DPCTL_SHORT;
160+
};
161+
162+
template <> struct kernel_arg_t<int>
163+
{
164+
static constexpr DPCTLKernelArgType value = DPCTL_INT;
165+
};
166+
167+
template <> struct kernel_arg_t<unsigned int>
168+
{
169+
static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_INT;
170+
};
171+
172+
template <> struct kernel_arg_t<long>
173+
{
174+
static constexpr DPCTLKernelArgType value = DPCTL_LONG;
175+
};
176+
177+
template <> struct kernel_arg_t<unsigned long>
178+
{
179+
static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG;
180+
};
181+
182+
template <> struct kernel_arg_t<long long>
183+
{
184+
static constexpr DPCTLKernelArgType value = DPCTL_LONG_LONG;
185+
};
186+
187+
template <> struct kernel_arg_t<unsigned long long>
188+
{
189+
static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG_LONG;
190+
};
191+
192+
/*
193+
template <>
194+
struct kernel_arg_t<size_t> {
195+
static constexpr DPCTLKernelArgType value = DPCTL_SIZE_T;
196+
};
197+
*/
198+
199+
template <> struct kernel_arg_t<float>
200+
{
201+
static constexpr DPCTLKernelArgType value = DPCTL_FLOAT;
202+
};
203+
204+
template <> struct kernel_arg_t<double>
205+
{
206+
static constexpr DPCTLKernelArgType value = DPCTL_DOUBLE;
207+
};
208+
209+
/*
210+
template <>
211+
struct kernel_arg_t<long double> {
212+
static constexpr DPCTLKernelArgType value = DPCTL_LONG_DOUBLE;
213+
};
214+
*/
215+
216+
#ifdef USE_FUNCTOR
217+
template <typename name, class kernelFunc>
218+
auto make_cgh_function(int n, kernelFunc func)
219+
{
220+
auto Kernel = [&](sycl::handler &cgh) {
221+
cgh.parallel_for<name>(sycl::range<1>(n), func);
222+
};
223+
return Kernel;
224+
};
225+
226+
template <typename Ty, typename scT> struct Add
227+
{
228+
const Ty *in1, *in2;
229+
Ty *out;
230+
scT val;
231+
Add(const Ty *a, const Ty *b, Ty *c, scT d) : in1(a), in2(b), out(c), val(d)
232+
{
233+
}
234+
void operator()(sycl::id<1> myId) const
235+
{
236+
auto gid = myId[0];
237+
out[gid] = in1[gid] + val * in2[gid];
238+
return;
239+
}
240+
};
241+
242+
template <typename T> struct PopulateA
243+
{
244+
T *out;
245+
PopulateA(T *a) : out(a) {}
246+
void operator()(sycl::id<1> myId) const
247+
{
248+
auto gid = myId[0];
249+
out[gid] = T(1);
250+
return;
251+
};
252+
};
253+
254+
template <typename T> struct PopulateB
255+
{
256+
T *out;
257+
PopulateB(T *b) : out(b) {}
258+
void operator()(sycl::id<1> myId) const
259+
{
260+
auto gid = myId[0];
261+
out[gid] = T(gid);
262+
return;
263+
};
264+
};
265+
#endif
266+
267+
template <typename T, typename scT>
268+
bool common_submit_range_fn(sycl::queue &q, size_t n, scT val)
269+
{
270+
sycl::program program(q.get_context());
271+
272+
T *a = sycl::malloc_device<T>(n, q);
273+
T *b = sycl::malloc_device<T>(n, q);
274+
T *c = sycl::malloc_device<T>(n, q);
275+
T *d = sycl::malloc_device<T>(n, q);
276+
277+
#ifdef USE_FUNCTOR
278+
auto popa_fn = make_cgh_function<class populate_a<T>>(n, PopulateA<T>(a));
279+
#else
280+
auto popa_fn = [&](sycl::handler &cgh) {
281+
cgh.parallel_for<populate_a<T>>(
282+
n, [=](sycl::id<1> idx) { a[idx[0]] = T(1); });
283+
};
284+
#endif
285+
std::cout << q.get_context().get_platform().get_backend() << std::endl;
286+
sycl::event popa_ev = q.submit(popa_fn);
287+
288+
#ifdef USE_FUNCTOR
289+
auto popb_fn = make_cgh_function<class populate_b<T>>(n, PopulateB<T>(b));
290+
#else
291+
auto popb_fn = [&](sycl::handler &cgh) {
292+
cgh.parallel_for<populate_b<T>>(
293+
n, [=](sycl::id<1> idx) { b[idx[0]] = T(idx[0]); });
294+
};
295+
#endif
296+
sycl::event popb_ev = q.submit(popb_fn);
297+
298+
#ifdef USE_FUNCTOR
299+
auto add_fn = make_cgh_function<class add_a_and_b<T, scT>>(
300+
n, Add<T, scT>(a, b, c, val));
301+
#else
302+
auto add_fn = [&](sycl::handler &cgh) {
303+
cgh.depends_on({popa_ev, popb_ev});
304+
cgh.parallel_for<add_a_and_b<T, scT>>(n, [=](sycl::id<1> idx) {
305+
const auto gid = idx[0];
306+
const T va = a[gid];
307+
const T vb = b[gid];
308+
c[gid] = va + val * vb;
309+
});
310+
};
311+
#endif
312+
313+
sycl::event add_ev = q.submit(add_fn);
314+
add_ev.wait_and_throw();
315+
316+
program.build_with_kernel_type<add_a_and_b<T, scT>>();
317+
auto kern = program.get_kernel<add_a_and_b<T, scT>>();
318+
319+
DPCTLSyclQueueRef QRef = reinterpret_cast<DPCTLSyclQueueRef>(&q);
320+
DPCTLSyclKernelRef KRef = reinterpret_cast<DPCTLSyclKernelRef>(&kern);
321+
DPCTLSyclEventRef PopAERef = reinterpret_cast<DPCTLSyclEventRef>(&popa_ev);
322+
DPCTLSyclEventRef PopBERef = reinterpret_cast<DPCTLSyclEventRef>(&popb_ev);
323+
324+
void *args2[4] = {reinterpret_cast<void *>(a), reinterpret_cast<void *>(b),
325+
reinterpret_cast<void *>(d),
326+
reinterpret_cast<void *>(&val)};
327+
DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR,
328+
DPCTL_VOID_PTR,
329+
kernel_arg_t<scT>::value};
330+
size_t Range[] = {n};
331+
DPCTLSyclEventRef events[2] = {PopAERef, PopBERef};
332+
auto ERef = DPCTLQueue_SubmitRange(KRef, QRef, args2, addKernelArgTypes, 4,
333+
Range, 1, events, 2);
334+
DPCTLQueue_Wait(QRef);
335+
DPCTLEvent_Delete(ERef);
336+
337+
T *host_data = new T[n];
338+
q.memcpy(host_data, d, n * sizeof(T));
339+
q.wait_and_throw();
340+
341+
bool worked = true;
342+
for (size_t i = 0; i < n; ++i) {
343+
worked = worked && (host_data[i] == T(1) + val * T(i));
344+
}
345+
346+
sycl::free(a, q);
347+
sycl::free(b, q);
348+
sycl::free(c, q);
349+
sycl::free(d, q);
350+
351+
return worked;
352+
};
353+
354+
} // end of anonymous namespace
355+
356+
struct TestQueueSubmitRange : public ::testing::Test
357+
{
358+
sycl::queue q;
359+
size_t n_elems = 512;
360+
361+
TestQueueSubmitRange() : q(sycl::default_selector{}) {}
362+
~TestQueueSubmitRange() {}
363+
};
364+
365+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeInt)
366+
{
367+
bool worked = false;
368+
worked = common_submit_range_fn<int, int>(q, n_elems, int(-1));
369+
EXPECT_TRUE(worked);
370+
}
371+
372+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeUnsignedInt)
373+
{
374+
bool worked = false;
375+
worked = common_submit_range_fn<unsigned int, unsigned int>(q, n_elems, 2);
376+
EXPECT_TRUE(worked);
377+
}
378+
379+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeShort)
380+
{
381+
bool worked = false;
382+
worked = common_submit_range_fn<short, short>(q, n_elems, short(-1));
383+
EXPECT_TRUE(worked);
384+
}
385+
386+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeLong)
387+
{
388+
bool worked = false;
389+
worked = common_submit_range_fn<long, long>(q, n_elems, -1);
390+
EXPECT_TRUE(worked);
391+
}
392+
393+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeUnsignedLong)
394+
{
395+
bool worked = false;
396+
worked =
397+
common_submit_range_fn<unsigned long, unsigned long>(q, n_elems, 2);
398+
EXPECT_TRUE(worked);
399+
}
400+
401+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeLongLong)
402+
{
403+
bool worked = false;
404+
worked = common_submit_range_fn<long long, long long>(q, n_elems, -1);
405+
EXPECT_TRUE(worked);
406+
}
407+
408+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeUnsignedLongLong)
409+
{
410+
bool worked = false;
411+
worked = common_submit_range_fn<unsigned long long, unsigned long long>(
412+
q, n_elems, 2);
413+
EXPECT_TRUE(worked);
414+
}
415+
416+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeFloat)
417+
{
418+
bool worked = false;
419+
worked = common_submit_range_fn<float, float>(q, n_elems, 0.5);
420+
EXPECT_TRUE(worked);
421+
}
422+
423+
TEST_F(TestQueueSubmitRange, ChkSubmitRangeDouble)
424+
{
425+
bool worked = false;
426+
worked = common_submit_range_fn<double, double>(q, n_elems, 0.5);
427+
EXPECT_TRUE(worked);
428+
}

0 commit comments

Comments
 (0)