Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 7b17cc7

Browse files
[ESIMD] Added a test for slm_load/slm_store APIs (#380)
* [ESIMD] Added a test for slm_load/slm_store APIs Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com> Co-authored-by: Denis Bakhvalov
1 parent 0ad1054 commit 7b17cc7

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

SYCL/ESIMD/api/slm_gather_scatter.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// REQUIRES: gpu
2+
// UNSUPPORTED: cuda
3+
// RUN: %clangxx -fsycl %s -o %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
//
6+
// The test checks functionality of the slm_gather/slm_scatter ESIMD APIs.
7+
8+
// TODO: Enable the test for 1 and 2 byte types when the implementation is fixed
9+
#define SKIP_ONE_AND_TWO_BYTE_BLOCKS 1
10+
11+
#include "../esimd_test_utils.hpp"
12+
13+
#include <CL/sycl.hpp>
14+
#include <CL/sycl/INTEL/esimd.hpp>
15+
#include <iostream>
16+
17+
using namespace cl::sycl;
18+
19+
template <typename T, unsigned VL, unsigned STRIDE> struct Kernel {
20+
T *buf;
21+
Kernel(T *buf) : buf(buf) {}
22+
23+
void operator()(id<1> i) const SYCL_ESIMD_KERNEL {
24+
using namespace sycl::ext::intel::experimental::esimd;
25+
26+
// In this test, we have a single workitem. No barriers required.
27+
slm_init(VL * STRIDE *
28+
sizeof(typename sycl::ext::intel::experimental::esimd::detail::
29+
dword_type<T>::type));
30+
31+
simd<T, VL> valsIn;
32+
valsIn.copy_from(buf);
33+
34+
simd<uint32_t, VL> offsets(0, STRIDE * sizeof(T));
35+
slm_scatter<T, VL>(valsIn, offsets);
36+
37+
simd_mask<VL> pred = 1;
38+
pred[VL - 1] = 0; // mask out the last lane
39+
simd<T, VL> valsOut = slm_gather<T, VL>(offsets, pred);
40+
valsOut *= -1;
41+
42+
valsOut.copy_to(buf);
43+
}
44+
};
45+
46+
template <typename T, unsigned VL, unsigned STRIDE> bool test(queue q) {
47+
using namespace sycl::ext::intel::experimental::esimd;
48+
constexpr size_t size = VL;
49+
50+
std::cout << "Testing T=" << typeid(T).name() << " VL=" << VL
51+
<< " STRIDE=" << STRIDE << "...\n";
52+
53+
auto dev = q.get_device();
54+
auto ctxt = q.get_context();
55+
T *A = static_cast<T *>(malloc_shared(size * sizeof(T), dev, ctxt));
56+
T *gold = new T[size];
57+
58+
for (int i = 0; i < size; ++i) {
59+
A[i] = (T)-i;
60+
gold[i] = (T)i;
61+
}
62+
63+
// Account for masked out last lane (with pred argument to slm_gather).
64+
gold[(VL - 1)] = (T)0;
65+
66+
try {
67+
range<1> glob_range{1};
68+
69+
auto e = q.submit([&](handler &cgh) {
70+
Kernel<T, VL, STRIDE> kernel(A);
71+
cgh.parallel_for(glob_range, kernel);
72+
});
73+
e.wait();
74+
} catch (sycl::exception const &e) {
75+
std::cerr << "SYCL exception caught: " << e.what() << '\n';
76+
free(A, ctxt);
77+
delete[] gold;
78+
return static_cast<bool>(e.code());
79+
}
80+
81+
int err_cnt = 0;
82+
for (unsigned i = 0; i < size; ++i) {
83+
if (A[i] != gold[i]) {
84+
if (++err_cnt < VL) {
85+
std::cout << "failed at index " << i << ": " << A[i]
86+
<< " != " << gold[i] << " (gold)\n";
87+
}
88+
}
89+
}
90+
91+
if (err_cnt > 0) {
92+
std::cout << " pass rate: "
93+
<< ((float)(size - err_cnt) / (float)size) * 100.0f << "% ("
94+
<< (size - err_cnt) << "/" << size << ")\n";
95+
}
96+
97+
free(A, ctxt);
98+
delete[] gold;
99+
100+
std::cout << (err_cnt > 0 ? " FAILED\n" : " Passed\n");
101+
return err_cnt > 0 ? false : true;
102+
}
103+
104+
template <typename T, unsigned VL> bool test(queue q) {
105+
bool passed = true;
106+
passed &= test<T, VL, 1>(q);
107+
passed &= test<T, VL, 2>(q);
108+
passed &= test<T, VL, 3>(q);
109+
passed &= test<T, VL, 4>(q);
110+
return passed;
111+
}
112+
113+
int main(void) {
114+
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
115+
116+
auto dev = q.get_device();
117+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
118+
119+
bool passed = true;
120+
121+
#ifndef SKIP_ONE_AND_TWO_BYTE_BLOCKS
122+
passed &= test<char, 16>(q);
123+
passed &= test<char, 32>(q);
124+
passed &= test<short, 16>(q);
125+
passed &= test<short, 32>(q);
126+
#endif
127+
passed &= test<int, 16>(q);
128+
passed &= test<int, 32>(q);
129+
passed &= test<float, 16>(q);
130+
passed &= test<float, 32>(q);
131+
132+
return passed ? 0 : 1;
133+
}

0 commit comments

Comments
 (0)