@@ -123,3 +123,306 @@ TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy)
123
123
DPCTLDevice_Delete (DRef);
124
124
DPCTLDeviceSelector_Delete (DSRef);
125
125
}
126
+
127
+ namespace
128
+ {
129
+
130
+ template <typename T> class populate_a ;
131
+
132
+ template <typename T> class populate_b ;
133
+
134
+ template <typename T, typename scT> class add_a_and_b ;
135
+
136
+ template <typename T> struct kernel_arg_t
137
+ {
138
+ static constexpr DPCTLKernelArgType value = DPCTL_VOID_PTR;
139
+ };
140
+
141
+ /*
142
+ template <>
143
+ struct kernel_arg_t<char> {
144
+ static constexpr DPCTLKernelArgType value = DPCTL_CHAR;
145
+ };
146
+
147
+ template <>
148
+ struct kernel_arg_t<signed char> {
149
+ static constexpr DPCTLKernelArgType value = DPCTL_SIGNED_CHAR;
150
+ };
151
+
152
+ template <>
153
+ struct kernel_arg_t<unsigned char> {
154
+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_CHAR;
155
+ };
156
+ */
157
+ template <> struct kernel_arg_t <short >
158
+ {
159
+ static constexpr DPCTLKernelArgType value = DPCTL_SHORT;
160
+ };
161
+
162
+ template <> struct kernel_arg_t <int >
163
+ {
164
+ static constexpr DPCTLKernelArgType value = DPCTL_INT;
165
+ };
166
+
167
+ template <> struct kernel_arg_t <unsigned int >
168
+ {
169
+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_INT;
170
+ };
171
+
172
+ template <> struct kernel_arg_t <long >
173
+ {
174
+ static constexpr DPCTLKernelArgType value = DPCTL_LONG;
175
+ };
176
+
177
+ template <> struct kernel_arg_t <unsigned long >
178
+ {
179
+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG;
180
+ };
181
+
182
+ template <> struct kernel_arg_t <long long >
183
+ {
184
+ static constexpr DPCTLKernelArgType value = DPCTL_LONG_LONG;
185
+ };
186
+
187
+ template <> struct kernel_arg_t <unsigned long long >
188
+ {
189
+ static constexpr DPCTLKernelArgType value = DPCTL_UNSIGNED_LONG_LONG;
190
+ };
191
+
192
+ /*
193
+ template <>
194
+ struct kernel_arg_t<size_t> {
195
+ static constexpr DPCTLKernelArgType value = DPCTL_SIZE_T;
196
+ };
197
+ */
198
+
199
+ template <> struct kernel_arg_t <float >
200
+ {
201
+ static constexpr DPCTLKernelArgType value = DPCTL_FLOAT;
202
+ };
203
+
204
+ template <> struct kernel_arg_t <double >
205
+ {
206
+ static constexpr DPCTLKernelArgType value = DPCTL_DOUBLE;
207
+ };
208
+
209
+ /*
210
+ template <>
211
+ struct kernel_arg_t<long double> {
212
+ static constexpr DPCTLKernelArgType value = DPCTL_LONG_DOUBLE;
213
+ };
214
+ */
215
+
216
+ #ifdef USE_FUNCTOR
217
+ template <typename name, class kernelFunc >
218
+ auto make_cgh_function (int n, kernelFunc func)
219
+ {
220
+ auto Kernel = [&](sycl::handler &cgh) {
221
+ cgh.parallel_for <name>(sycl::range<1 >(n), func);
222
+ };
223
+ return Kernel;
224
+ };
225
+
226
+ template <typename Ty, typename scT> struct Add
227
+ {
228
+ const Ty *in1, *in2;
229
+ Ty *out;
230
+ scT val;
231
+ Add (const Ty *a, const Ty *b, Ty *c, scT d) : in1(a), in2(b), out(c), val(d)
232
+ {
233
+ }
234
+ void operator ()(sycl::id<1 > myId) const
235
+ {
236
+ auto gid = myId[0 ];
237
+ out[gid] = in1[gid] + val * in2[gid];
238
+ return ;
239
+ }
240
+ };
241
+
242
+ template <typename T> struct PopulateA
243
+ {
244
+ T *out;
245
+ PopulateA (T *a) : out(a) {}
246
+ void operator ()(sycl::id<1 > myId) const
247
+ {
248
+ auto gid = myId[0 ];
249
+ out[gid] = T (1 );
250
+ return ;
251
+ };
252
+ };
253
+
254
+ template <typename T> struct PopulateB
255
+ {
256
+ T *out;
257
+ PopulateB (T *b) : out(b) {}
258
+ void operator ()(sycl::id<1 > myId) const
259
+ {
260
+ auto gid = myId[0 ];
261
+ out[gid] = T (gid);
262
+ return ;
263
+ };
264
+ };
265
+ #endif
266
+
267
+ template <typename T, typename scT>
268
+ bool common_submit_range_fn (sycl::queue &q, size_t n, scT val)
269
+ {
270
+ sycl::program program (q.get_context ());
271
+
272
+ T *a = sycl::malloc_device<T>(n, q);
273
+ T *b = sycl::malloc_device<T>(n, q);
274
+ T *c = sycl::malloc_device<T>(n, q);
275
+ T *d = sycl::malloc_device<T>(n, q);
276
+
277
+ #ifdef USE_FUNCTOR
278
+ auto popa_fn = make_cgh_function<class populate_a <T>>(n, PopulateA<T>(a));
279
+ #else
280
+ auto popa_fn = [&](sycl::handler &cgh) {
281
+ cgh.parallel_for <populate_a<T>>(
282
+ n, [=](sycl::id<1 > idx) { a[idx[0 ]] = T (1 ); });
283
+ };
284
+ #endif
285
+ std::cout << q.get_context ().get_platform ().get_backend () << std::endl;
286
+ sycl::event popa_ev = q.submit (popa_fn);
287
+
288
+ #ifdef USE_FUNCTOR
289
+ auto popb_fn = make_cgh_function<class populate_b <T>>(n, PopulateB<T>(b));
290
+ #else
291
+ auto popb_fn = [&](sycl::handler &cgh) {
292
+ cgh.parallel_for <populate_b<T>>(
293
+ n, [=](sycl::id<1 > idx) { b[idx[0 ]] = T (idx[0 ]); });
294
+ };
295
+ #endif
296
+ sycl::event popb_ev = q.submit (popb_fn);
297
+
298
+ #ifdef USE_FUNCTOR
299
+ auto add_fn = make_cgh_function<class add_a_and_b <T, scT>>(
300
+ n, Add<T, scT>(a, b, c, val));
301
+ #else
302
+ auto add_fn = [&](sycl::handler &cgh) {
303
+ cgh.depends_on ({popa_ev, popb_ev});
304
+ cgh.parallel_for <add_a_and_b<T, scT>>(n, [=](sycl::id<1 > idx) {
305
+ const auto gid = idx[0 ];
306
+ const T va = a[gid];
307
+ const T vb = b[gid];
308
+ c[gid] = va + val * vb;
309
+ });
310
+ };
311
+ #endif
312
+
313
+ sycl::event add_ev = q.submit (add_fn);
314
+ add_ev.wait_and_throw ();
315
+
316
+ program.build_with_kernel_type <add_a_and_b<T, scT>>();
317
+ auto kern = program.get_kernel <add_a_and_b<T, scT>>();
318
+
319
+ DPCTLSyclQueueRef QRef = reinterpret_cast <DPCTLSyclQueueRef>(&q);
320
+ DPCTLSyclKernelRef KRef = reinterpret_cast <DPCTLSyclKernelRef>(&kern);
321
+ DPCTLSyclEventRef PopAERef = reinterpret_cast <DPCTLSyclEventRef>(&popa_ev);
322
+ DPCTLSyclEventRef PopBERef = reinterpret_cast <DPCTLSyclEventRef>(&popb_ev);
323
+
324
+ void *args2[4 ] = {reinterpret_cast <void *>(a), reinterpret_cast <void *>(b),
325
+ reinterpret_cast <void *>(d),
326
+ reinterpret_cast <void *>(&val)};
327
+ DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR,
328
+ DPCTL_VOID_PTR,
329
+ kernel_arg_t <scT>::value};
330
+ size_t Range[] = {n};
331
+ DPCTLSyclEventRef events[2 ] = {PopAERef, PopBERef};
332
+ auto ERef = DPCTLQueue_SubmitRange (KRef, QRef, args2, addKernelArgTypes, 4 ,
333
+ Range, 1 , events, 2 );
334
+ DPCTLQueue_Wait (QRef);
335
+ DPCTLEvent_Delete (ERef);
336
+
337
+ T *host_data = new T[n];
338
+ q.memcpy (host_data, d, n * sizeof (T));
339
+ q.wait_and_throw ();
340
+
341
+ bool worked = true ;
342
+ for (size_t i = 0 ; i < n; ++i) {
343
+ worked = worked && (host_data[i] == T (1 ) + val * T (i));
344
+ }
345
+
346
+ sycl::free (a, q);
347
+ sycl::free (b, q);
348
+ sycl::free (c, q);
349
+ sycl::free (d, q);
350
+
351
+ return worked;
352
+ };
353
+
354
+ } // end of anonymous namespace
355
+
356
+ struct TestQueueSubmitRange : public ::testing::Test
357
+ {
358
+ sycl::queue q;
359
+ size_t n_elems = 512 ;
360
+
361
+ TestQueueSubmitRange () : q(sycl::default_selector{}) {}
362
+ ~TestQueueSubmitRange () {}
363
+ };
364
+
365
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeInt)
366
+ {
367
+ bool worked = false ;
368
+ worked = common_submit_range_fn<int , int >(q, n_elems, int (-1 ));
369
+ EXPECT_TRUE (worked);
370
+ }
371
+
372
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedInt)
373
+ {
374
+ bool worked = false ;
375
+ worked = common_submit_range_fn<unsigned int , unsigned int >(q, n_elems, 2 );
376
+ EXPECT_TRUE (worked);
377
+ }
378
+
379
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeShort)
380
+ {
381
+ bool worked = false ;
382
+ worked = common_submit_range_fn<short , short >(q, n_elems, short (-1 ));
383
+ EXPECT_TRUE (worked);
384
+ }
385
+
386
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeLong)
387
+ {
388
+ bool worked = false ;
389
+ worked = common_submit_range_fn<long , long >(q, n_elems, -1 );
390
+ EXPECT_TRUE (worked);
391
+ }
392
+
393
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedLong)
394
+ {
395
+ bool worked = false ;
396
+ worked =
397
+ common_submit_range_fn<unsigned long , unsigned long >(q, n_elems, 2 );
398
+ EXPECT_TRUE (worked);
399
+ }
400
+
401
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeLongLong)
402
+ {
403
+ bool worked = false ;
404
+ worked = common_submit_range_fn<long long , long long >(q, n_elems, -1 );
405
+ EXPECT_TRUE (worked);
406
+ }
407
+
408
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeUnsignedLongLong)
409
+ {
410
+ bool worked = false ;
411
+ worked = common_submit_range_fn<unsigned long long , unsigned long long >(
412
+ q, n_elems, 2 );
413
+ EXPECT_TRUE (worked);
414
+ }
415
+
416
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeFloat)
417
+ {
418
+ bool worked = false ;
419
+ worked = common_submit_range_fn<float , float >(q, n_elems, 0.5 );
420
+ EXPECT_TRUE (worked);
421
+ }
422
+
423
+ TEST_F (TestQueueSubmitRange, ChkSubmitRangeDouble)
424
+ {
425
+ bool worked = false ;
426
+ worked = common_submit_range_fn<double , double >(q, n_elems, 0.5 );
427
+ EXPECT_TRUE (worked);
428
+ }
0 commit comments