Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 386d231

Browse files
author
JackAKirk
committed
[SYCL][CUDA] Added tests for P2P memcpy for buffer/images.
Common tests (suitable for all backends) from enqueue-arg-order-image.cpp and enqueue-arg-order-buffer.cpp have moved to enqueue-arg-order-image-common.cpp and enqueue-arg-order-buffer-cuda.cpp respectively. Common util functions have moved to utils.hpp. tests that are not suitable for the cuda backend have been moved to enqueue-arg-order-image-h2d.cpp and enqueue-arg-order-buffer-h2d.cpp respectively. Signed-off-by: JackAKirk <jack.kirk@codeplay.com>
1 parent 718688c commit 386d231

7 files changed

+1272
-0
lines changed
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
3+
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
4+
5+
#include "remind_utils.hpp"
6+
#include <CL/sycl.hpp>
7+
#include <CL/sycl/accessor.hpp>
8+
#include <iostream>
9+
10+
using namespace cl::sycl;
11+
12+
constexpr long width = 16;
13+
constexpr long height = 5;
14+
constexpr long total = width * height;
15+
16+
constexpr long depth = 3;
17+
constexpr long total3D = total * depth;
18+
19+
// ----------- FUNCTIONAL
20+
template <template <int> class T> static void printRangeId(T<3> arr) {
21+
std::cout << ":: "
22+
<< "{" << arr[0] << ", " << arr[1] << ", " << arr[2] << "}"
23+
<< std::endl;
24+
}
25+
26+
void testDetailConvertToArrayOfN() {
27+
// ranges, as used with buffers (args reverse order for images)
28+
range<1> range_1D(width);
29+
range<2> range_2D(height, width);
30+
range<3> range_3D(depth, height, width);
31+
32+
range<3> arr1 = sycl::detail::convertToArrayOfN<3, 1>(range_1D);
33+
// {16,1,1}
34+
printRangeId(arr1);
35+
assert(arr1[0] == width && arr1[1] == 1 && arr1[2] == 1 &&
36+
"arr1 expected as {16,1,1}");
37+
38+
range<3> arr2 = sycl::detail::convertToArrayOfN<3, 1>(range_2D);
39+
//{5, 16, 1}
40+
printRangeId(arr2);
41+
assert(arr2[0] == height && arr2[1] == width && arr2[2] == 1 &&
42+
"arr2 expected as {5, 16, 1}");
43+
44+
range<3> arr3 = sycl::detail::convertToArrayOfN<3, 1>(range_3D);
45+
//{3, 5, 16}
46+
printRangeId(arr3);
47+
assert(arr3[0] == depth && arr3[1] == height && arr3[2] == width &&
48+
"arr3 expected as {3,5,16}");
49+
50+
range<2> smaller2 = sycl::detail::convertToArrayOfN<2, 1>(range_3D);
51+
//{3,5}
52+
std::cout << "{" << smaller2[0] << "," << smaller2[1] << "}" << std::endl;
53+
assert(smaller2[0] == depth && smaller2[1] == height &&
54+
"smaller2 expected {3,5} ");
55+
56+
range<1> smaller1 = sycl::detail::convertToArrayOfN<1, 1>(range_3D);
57+
//{3}
58+
assert(smaller1[0] == depth && "smaller1 expected {3} ");
59+
}
60+
61+
// class to give access to protected function getLinearIndex
62+
template <typename T, int Dims>
63+
class AccTest : public accessor<T, Dims, access::mode::read_write,
64+
access::target::host_buffer,
65+
access::placeholder::false_t> {
66+
using AccessorT =
67+
accessor<T, Dims, access::mode::read_write, access::target::host_buffer,
68+
access::placeholder::false_t>;
69+
70+
public:
71+
AccTest(AccessorT acc) : AccessorT(acc) {}
72+
73+
size_t gLI(id<Dims> idx) { return AccessorT::getLinearIndex(idx); }
74+
};
75+
76+
void testGetLinearIndex() {
77+
constexpr int x = 4, y = 3, z = 1;
78+
// width=16, height=5, depth = 3.
79+
// row is 16 (ie. width)
80+
// slice is 80 (ie width * height)
81+
size_t target_1D = x;
82+
size_t target_2D = (y * width) + x; // s.b. (3*16) + 4 => 52
83+
size_t target_3D =
84+
(height * width * z) + (y * width) + x; // s.b. 80 + (3*16) + 4 => 132
85+
86+
std::vector<float> data_1D(width, 13);
87+
std::vector<float> data_2D(total, 7);
88+
std::vector<float> data_3D(total3D, 17);
89+
90+
// test accessor protected function
91+
{
92+
buffer<float, 1> buffer_1D(data_1D.data(), range<1>(width));
93+
buffer<float, 2> buffer_2D(data_2D.data(), range<2>(height, width));
94+
buffer<float, 3> buffer_3D(data_3D.data(), range<3>(depth, height, width));
95+
96+
auto acc_1D = buffer_1D.get_access<access::mode::read_write>();
97+
auto accTest_1D = AccTest<float, 1>(acc_1D);
98+
size_t linear_1D = accTest_1D.gLI(id<1>(x)); // s.b. 4
99+
std::cout << "linear_1D: " << linear_1D << " target_1D: " << target_1D
100+
<< std::endl;
101+
assert(linear_1D == target_1D && "linear_1D s.b. 4");
102+
103+
auto acc_2D = buffer_2D.get_access<access::mode::read_write>();
104+
auto accTest_2D = AccTest<float, 2>(acc_2D);
105+
size_t linear_2D = accTest_2D.gLI(id<2>(y, x));
106+
std::cout << "linear_2D: " << linear_2D << " target_2D: " << target_2D
107+
<< std::endl;
108+
assert(linear_2D == target_2D && "linear_2D s.b. 52");
109+
110+
auto acc_3D = buffer_3D.get_access<access::mode::read_write>();
111+
auto accTest_3D = AccTest<float, 3>(acc_3D);
112+
size_t linear_3D = accTest_3D.gLI(id<3>(z, y, x));
113+
std::cout << "linear_3D: " << linear_3D << " target_3D: " << target_3D
114+
<< std::endl;
115+
assert(linear_3D == target_3D && "linear_3D s.b. 132");
116+
}
117+
118+
// common.hpp variant of getLinearIndex
119+
size_t lin_1D = getLinearIndex(id<1>(x), range<1>(width));
120+
std::cout << "lin_1D: " << lin_1D << std::endl;
121+
assert(lin_1D == target_1D && "lin_1D s.b. 4");
122+
123+
size_t lin_2D = getLinearIndex(id<2>(y, x), range<2>(height, width));
124+
std::cout << "lin_2D: " << lin_2D << " target_2D: " << target_2D
125+
<< std::endl;
126+
assert(lin_2D == target_2D && "lin_2D s.b. 52");
127+
128+
size_t lin_3D =
129+
getLinearIndex(id<3>(z, y, x), range<3>(depth, height, width));
130+
std::cout << "lin_3D: " << lin_3D << " target_3D: " << target_3D
131+
<< std::endl;
132+
assert(lin_3D == target_3D && "lin_3D s.b. 132");
133+
}
134+
135+
// ----------- BUFFERS
136+
137+
void testcopyD2HBuffer() {
138+
std::cout << "start copyD2H-buffer" << std::endl;
139+
std::vector<float> data_from_1D(width, 13);
140+
std::vector<float> data_to_1D(width, 0);
141+
std::vector<float> data_from_2D(total, 7);
142+
std::vector<float> data_to_2D(total, 0);
143+
std::vector<float> data_from_3D(total3D, 17);
144+
std::vector<float> data_to_3D(total3D, 0);
145+
146+
{
147+
buffer<float, 1> buffer_from_1D(data_from_1D.data(), range<1>(width));
148+
buffer<float, 1> buffer_to_1D(data_to_1D.data(), range<1>(width));
149+
queue myQueue;
150+
myQueue.submit([&](handler &cgh) {
151+
auto read = buffer_from_1D.get_access<access::mode::read>(cgh);
152+
auto write = buffer_to_1D.get_access<access::mode::write>(cgh);
153+
cgh.parallel_for<class copyD2H_1D>(
154+
buffer_from_1D.get_range(),
155+
[=](id<1> index) { write[index] = read[index] * -1; });
156+
});
157+
} // ~buffer 1D
158+
159+
{
160+
buffer<float, 2> buffer_from_2D(data_from_2D.data(),
161+
range<2>(height, width));
162+
buffer<float, 2> buffer_to_2D(data_to_2D.data(), range<2>(height, width));
163+
queue myQueue;
164+
myQueue.submit([&](handler &cgh) {
165+
auto read = buffer_from_2D.get_access<access::mode::read>(cgh);
166+
auto write = buffer_to_2D.get_access<access::mode::write>(cgh);
167+
cgh.parallel_for<class copyD2H_2D>(
168+
buffer_from_2D.get_range(),
169+
[=](id<2> index) { write[index] = read[index] * -1; });
170+
});
171+
} // ~buffer 2D
172+
173+
{
174+
buffer<float, 3> buffer_from_3D(data_from_3D.data(),
175+
range<3>(depth, height, width));
176+
buffer<float, 3> buffer_to_3D(data_to_3D.data(),
177+
range<3>(depth, height, width));
178+
queue myQueue;
179+
myQueue.submit([&](handler &cgh) {
180+
auto read = buffer_from_3D.get_access<access::mode::read>(cgh);
181+
auto write = buffer_to_3D.get_access<access::mode::write>(cgh);
182+
cgh.parallel_for<class copyD2H_3D>(
183+
buffer_from_3D.get_range(),
184+
[=](id<3> index) { write[index] = read[index] * -1; });
185+
});
186+
} // ~buffer 3D
187+
188+
std::cout << "end copyD2H-buffer" << std::endl;
189+
}
190+
191+
void testcopyD2DBuffer() {
192+
std::cout << "start copyD2D-buffer" << std::endl;
193+
std::vector<float> data_from_1D(width, 13);
194+
std::vector<float> data_to_1D(width, 0);
195+
std::vector<float> data_from_2D(total, 7);
196+
std::vector<float> data_to_2D(total, 0);
197+
std::vector<float> data_from_3D(total3D, 17);
198+
std::vector<float> data_to_3D(total3D, 0);
199+
{
200+
buffer<float, 1> buffer_from_1D(data_from_1D.data(), range<1>(width));
201+
buffer<float, 1> buffer_to_1D(data_to_1D.data(), range<1>(width));
202+
buffer<float, 2> buffer_from_2D(data_from_2D.data(),
203+
range<2>(height, width));
204+
buffer<float, 2> buffer_to_2D(data_to_2D.data(), range<2>(height, width));
205+
buffer<float, 3> buffer_from_3D(data_from_3D.data(),
206+
range<3>(depth, height, width));
207+
buffer<float, 3> buffer_to_3D(data_to_3D.data(),
208+
range<3>(depth, height, width));
209+
210+
queue myQueue;
211+
auto e1 = myQueue.submit([&](handler &cgh) {
212+
auto read = buffer_from_1D.get_access<access::mode::read>(cgh);
213+
auto write = buffer_to_1D.get_access<access::mode::write>(cgh);
214+
cgh.copy(read, write);
215+
});
216+
auto e2 = myQueue.submit([&](handler &cgh) {
217+
cgh.depends_on(e1);
218+
auto read = buffer_from_2D.get_access<access::mode::read>(cgh);
219+
auto write = buffer_to_2D.get_access<access::mode::write>(cgh);
220+
cgh.copy(read, write);
221+
});
222+
auto e3 = myQueue.submit([&](handler &cgh) {
223+
cgh.depends_on(e2);
224+
auto read = buffer_from_3D.get_access<access::mode::read>(cgh);
225+
auto write = buffer_to_3D.get_access<access::mode::write>(cgh);
226+
cgh.copy(read, write);
227+
});
228+
229+
} // ~buffer
230+
std::cout << "end copyD2D-buffer" << std::endl;
231+
}
232+
233+
void testFill_Buffer() {
234+
std::cout << "start testFill Buffer" << std::endl;
235+
std::vector<float> data_1D(width, 0);
236+
std::vector<float> data_2D(total, 0);
237+
std::vector<float> data_3D(total3D, 0);
238+
{
239+
buffer<float, 1> buffer_1D(data_1D.data(), range<1>(width));
240+
buffer<float, 2> buffer_2D(data_2D.data(), range<2>(height, width));
241+
buffer<float, 3> buffer_3D(data_3D.data(), range<3>(depth, height, width));
242+
243+
queue myQueue;
244+
auto e1 = myQueue.submit([&](handler &cgh) {
245+
auto acc1D = buffer_1D.get_access<cl::sycl::access::mode::write>(cgh);
246+
cgh.fill(acc1D, float{1});
247+
});
248+
auto e2 = myQueue.submit([&](handler &cgh) {
249+
cgh.depends_on(e1);
250+
auto acc2D = buffer_2D.get_access<cl::sycl::access::mode::write>(cgh);
251+
cgh.fill(acc2D, float{2});
252+
});
253+
auto e3 = myQueue.submit([&](handler &cgh) {
254+
cgh.depends_on(e2);
255+
auto acc3D = buffer_3D.get_access<cl::sycl::access::mode::write>(cgh);
256+
cgh.fill(acc3D, float{3});
257+
});
258+
} // ~buffer
259+
std::cout << "end testFill Buffer" << std::endl;
260+
}
261+
262+
// --------------
263+
264+
int main() {
265+
remind(width, height, depth);
266+
267+
testDetailConvertToArrayOfN();
268+
testGetLinearIndex();
269+
270+
testcopyD2HBuffer();
271+
testcopyD2DBuffer();
272+
testFill_Buffer();
273+
}
274+
275+
// ----------- BUFFERS
276+
277+
// CHECK-LABEL: start copyD2H-buffer
278+
// CHECK: ---> piEnqueueMemBufferRead(
279+
// CHECK: <unknown> : 64
280+
// CHECK: ---> piEnqueueMemBufferReadRect(
281+
// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1
282+
// CHECK-NEXT: <unknown> : 64
283+
// CHECK: ---> piEnqueueMemBufferReadRect(
284+
// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/3
285+
// CHECK-NEXT: <unknown> : 64
286+
// CHECK-NEXT: <unknown> : 320
287+
// CHECK: end copyD2H-buffer
288+
289+
// CHECK-LABEL: start copyD2D-buffer
290+
// CHECK: ---> piEnqueueMemBufferCopy(
291+
// CHECK: <unknown> : 64
292+
// CHECK: ---> piEnqueueMemBufferCopyRect(
293+
// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/1
294+
// CHECK-NEXT: <unknown> : 64
295+
// CHECK-NEXT: <unknown> : 320
296+
// CHECK-NEXT: <unknown> : 64
297+
// CHECK-NEXT: <unknown> : 320
298+
// CHECK: pi_buff_rect_region width_bytes/height/depth : 64/5/3
299+
// CHECK-NEXT: <unknown> : 64
300+
// CHECK-NEXT: <unknown> : 320
301+
// CHECK-NEXT: <unknown> : 64
302+
// CHECK-NEXT: <unknown> : 320
303+
// CHECK: end copyD2D-buffer
304+
305+
// CHECK-LABEL: start testFill Buffer
306+
// CHECK: ---> piEnqueueMemBufferFill(
307+
// CHECK: <unknown> : 4
308+
// CHECK-NEXT: <unknown> : 0
309+
// CHECK-NEXT: <unknown> : 64
310+
// CHECK: end testFill Buffer

0 commit comments

Comments
 (0)