Skip to content

Commit 9c22cff

Browse files
authored
[SYCL][CUDA] Added tests for inorder queues that use USM. (intel#267)
Signed-off-by: JackAKirk <jack.kirk@codeplay.com>
1 parent e371f13 commit 9c22cff

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// SYCL in ordered queues explicit USM test.
6+
// Simple test checking explicit USM functionality using a Queue with the
7+
// in_order property.
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===----------------------------------------------------------------------===//
14+
#include <CL/sycl.hpp>
15+
#include <iostream>
16+
17+
using namespace cl::sycl;
18+
19+
int main() {
20+
21+
{
22+
const int dataSize = 32;
23+
const size_t numBytes = static_cast<size_t>(dataSize) * sizeof(int);
24+
25+
int dataA[dataSize] = {0};
26+
int dataB[dataSize] = {0};
27+
28+
queue Queue{property::queue::in_order()};
29+
30+
auto devicePtrA = malloc_device<int>(numBytes, Queue);
31+
Queue.memcpy(devicePtrA, &dataA, numBytes);
32+
33+
Queue.submit([&](handler &cgh) {
34+
auto myRange = range<1>(dataSize);
35+
auto myKernel = ([=](id<1> idx) { devicePtrA[idx] = idx[0]; });
36+
37+
cgh.parallel_for<class ordered_writer>(myRange, myKernel);
38+
});
39+
40+
auto devicePtrB = malloc_device<int>(numBytes, Queue);
41+
Queue.memcpy(devicePtrB, &dataB, numBytes);
42+
43+
Queue.submit([&](handler &cgh) {
44+
auto myRange = range<1>(dataSize);
45+
auto myKernel = ([=](id<1> idx) { devicePtrB[idx] = devicePtrA[idx]; });
46+
47+
cgh.parallel_for<class ordered_reader>(myRange, myKernel);
48+
});
49+
50+
Queue.memcpy(&dataB, devicePtrB, numBytes);
51+
52+
Queue.wait();
53+
54+
auto ctxt = Queue.get_context();
55+
free(devicePtrA, ctxt);
56+
free(devicePtrB, ctxt);
57+
58+
for (int i = 0; i != dataSize; ++i) {
59+
if (dataB[i] != i) {
60+
std::cout << "Result mismatches " << dataB[i] << " vs expected " << i
61+
<< " for index " << i << std::endl;
62+
}
63+
}
64+
}
65+
return 0;
66+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// REQUIRES: cuda
2+
//
3+
// Currently only CUDA is supported: it would be necessary to generalize
4+
// mem_advice for other devices before adding support.
5+
//
6+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
7+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
8+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
9+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
10+
// SYCL in ordered queues implicit USM test.
11+
// Simple test checking implicit USM functionality using a Queue with the
12+
// in_order property.
13+
//
14+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
15+
// See https://llvm.org/LICENSE.txt for license information.
16+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
17+
//
18+
//===----------------------------------------------------------------------===//
19+
#include <CL/sycl.hpp>
20+
#include <iostream>
21+
22+
using namespace cl::sycl;
23+
24+
int main() {
25+
26+
{
27+
queue Queue{property::queue::in_order()};
28+
29+
// optimize for read only
30+
const int mem_advice = 1;
31+
32+
const int dataSize = 32;
33+
const size_t numBytes = static_cast<size_t>(dataSize) * sizeof(int);
34+
35+
auto dataA = malloc_shared<int>(numBytes, Queue);
36+
auto dataB = malloc_shared<int>(numBytes, Queue);
37+
38+
for (int i = 0; i < dataSize; i++) {
39+
dataA[i] = i;
40+
dataB[i] = 0;
41+
}
42+
43+
Queue.mem_advise(dataA, numBytes, (pi_mem_advice)mem_advice);
44+
45+
Queue.submit([&](handler &cgh) {
46+
auto myRange = range<1>(dataSize);
47+
auto myKernel = ([=](id<1> idx) { dataB[idx] = dataA[idx]; });
48+
49+
cgh.parallel_for<class ordered_reader>(myRange, myKernel);
50+
});
51+
52+
Queue.wait();
53+
54+
for (int i = 0; i != dataSize; ++i) {
55+
if (dataB[i] != i) {
56+
std::cout << "Result mismatches " << dataB[i] << " vs expected " << i
57+
<< " for index " << i << std::endl;
58+
}
59+
}
60+
61+
auto ctxt = Queue.get_context();
62+
free(dataA, ctxt);
63+
free(dataB, ctxt);
64+
}
65+
return 0;
66+
}

0 commit comments

Comments
 (0)