1
1
#pragma once
2
2
3
+ #ifndef TEST_GENERIC_IN_LOCAL
4
+ #define TEST_GENERIC_IN_LOCAL 0
5
+ #endif
6
+
3
7
#include < CL/sycl.hpp>
4
8
#include < algorithm>
5
9
#include < cassert>
10
+ #include < iostream>
6
11
#include < numeric>
7
12
#include < vector>
8
13
9
14
using namespace sycl ;
10
15
11
16
template <template <typename , memory_order, memory_scope, access::address_space>
12
17
class AtomicRef ,
13
- access::address_space address_space, typename T>
14
- class load_kernel ;
18
+ access::address_space space, typename T,
19
+ memory_order order = memory_order::relaxed,
20
+ memory_scope scope = memory_scope::device>
21
+ void load_local_test (queue q, size_t N) {
22
+ T initial = T (42 );
23
+ T load = initial;
24
+ std::vector<T> output (N);
25
+ std::fill (output.begin (), output.end (), T (0 ));
26
+ {
27
+ buffer<T> load_buf (&load, 1 );
28
+ buffer<T> output_buf (output.data (), output.size ());
29
+
30
+ q.submit ([&](handler &cgh) {
31
+ auto ld = load_buf.template get_access <access::mode::read_write>(cgh);
32
+ auto out =
33
+ output_buf.template get_access <access::mode::discard_write>(cgh);
34
+ accessor<T, 1 , access::mode::read_write, access::target::local> loc (1 ,
35
+ cgh);
36
+ cgh.parallel_for (nd_range<1 >(N, N), [=](nd_item<1 > it) {
37
+ int gid = it.get_global_id (0 );
38
+ if (gid == 0 )
39
+ loc[0 ] = initial;
40
+ it.barrier (access::fence_space::local_space);
41
+ auto atm = AtomicRef<T, memory_order::relaxed, scope, space>(loc[0 ]);
42
+ out[gid] = atm.load (order);
43
+ });
44
+ }).wait_and_throw ();
45
+ }
46
+
47
+ // All work-items should read the same value
48
+ // Atomicity isn't tested here, but support for load() is
49
+ assert (std::all_of (output.begin (), output.end (),
50
+ [&](T x) { return (x == initial); }));
51
+ }
15
52
16
53
template <template <typename , memory_order, memory_scope, access::address_space>
17
54
class AtomicRef ,
18
- access::address_space address_space, typename T>
19
- void load_test (queue q, size_t N) {
55
+ access::address_space space, typename T,
56
+ memory_order order = memory_order::relaxed,
57
+ memory_scope scope = memory_scope::device>
58
+ void load_global_test (queue q, size_t N) {
20
59
T initial = T (42 );
21
60
T load = initial;
22
61
std::vector<T> output (N);
@@ -29,13 +68,11 @@ void load_test(queue q, size_t N) {
29
68
auto ld = load_buf.template get_access <access::mode::read_write>(cgh);
30
69
auto out =
31
70
output_buf.template get_access <access::mode::discard_write>(cgh);
32
- cgh.parallel_for <load_kernel<AtomicRef, address_space, T>>(
33
- range<1 >(N), [=](item<1 > it) {
34
- size_t gid = it.get_id (0 );
35
- auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
36
- address_space>(ld[0 ]);
37
- out[gid] = atm.load ();
38
- });
71
+ cgh.parallel_for (range<1 >(N), [=](item<1 > it) {
72
+ size_t gid = it.get_id (0 );
73
+ auto atm = AtomicRef<T, memory_order::relaxed, scope, space>(ld[0 ]);
74
+ out[gid] = atm.load (order);
75
+ });
39
76
});
40
77
}
41
78
@@ -45,15 +82,105 @@ void load_test(queue q, size_t N) {
45
82
[&](T x) { return (x == initial); }));
46
83
}
47
84
48
- template <typename T> void load_test (queue q, size_t N) {
85
+ template <access::address_space space, typename T,
86
+ memory_order order = memory_order::relaxed,
87
+ memory_scope scope = memory_scope::device>
88
+ void load_test (queue q, size_t N) {
89
+ constexpr bool do_local_tests =
90
+ space == access::address_space::local_space ||
91
+ (space == access::address_space::generic_space && TEST_GENERIC_IN_LOCAL);
92
+ constexpr bool do_global_tests =
93
+ space == access::address_space::global_space ||
94
+ (space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
95
+ constexpr bool do_ext_tests = space != access::address_space::generic_space;
96
+ if constexpr (do_local_tests) {
97
+ #ifdef RUN_DEPRECATED
98
+ if constexpr (do_ext_tests) {
99
+ load_local_test<::sycl::ext::oneapi::atomic_ref, space, T, order, scope>(
100
+ q, N);
101
+ }
102
+ #else
103
+ load_local_test<::sycl::atomic_ref, space, T, order, scope>(q, N);
104
+ #endif
105
+ }
106
+ if constexpr (do_global_tests) {
49
107
#ifdef RUN_DEPRECATED
50
- load_test<::sycl::ext::oneapi::atomic_ref,
51
- access::address_space::global_space, T>(q, N);
108
+ if constexpr (do_ext_tests) {
109
+ load_global_test<::sycl::ext::oneapi::atomic_ref, space, T, order, scope>(
110
+ q, N);
111
+ }
52
112
#else
53
- load_test <::sycl::atomic_ref, access::address_space::global_space , T>(q, N);
113
+ load_global_test <::sycl::atomic_ref, space , T, order, scope >(q, N);
54
114
#endif
115
+ }
116
+ }
117
+
118
+ template <access::address_space space, typename T,
119
+ memory_order order = memory_order::relaxed>
120
+ void load_test_scopes (queue q, size_t N) {
121
+ std::vector<memory_scope> scopes =
122
+ q.get_device ().get_info <info::device::atomic_memory_scope_capabilities>();
123
+ if (std::find (scopes.begin (), scopes.end (), memory_scope::system) !=
124
+ scopes.end ()) {
125
+ load_test<space, T, order, memory_scope::system>(q, N);
126
+ }
127
+ if (std::find (scopes.begin (), scopes.end (), memory_scope::work_group) !=
128
+ scopes.end ()) {
129
+ load_test<space, T, order, memory_scope::work_group>(q, N);
130
+ }
131
+ if (std::find (scopes.begin (), scopes.end (), memory_scope::sub_group) !=
132
+ scopes.end ()) {
133
+ load_test<space, T, order, memory_scope::sub_group>(q, N);
134
+ }
135
+ load_test<space, T, order, memory_scope::device>(q, N);
136
+ }
137
+
138
+ template <access::address_space space, typename T>
139
+ void load_test_orders_scopes (queue q, size_t N) {
140
+ std::vector<memory_order> orders =
141
+ q.get_device ().get_info <info::device::atomic_memory_order_capabilities>();
142
+ if (std::find (orders.begin (), orders.end (), memory_order::acquire) !=
143
+ orders.end ()) {
144
+ load_test_scopes<space, T, memory_order::acquire>(q, N);
145
+ }
146
+ load_test_scopes<space, T, memory_order::relaxed>(q, N);
55
147
}
56
148
57
- template <typename T> void load_generic_test (queue q, size_t N) {
58
- load_test<::sycl::atomic_ref, access::address_space::generic_space, T>(q, N);
149
+ template <access::address_space space> void load_test_all () {
150
+ queue q;
151
+
152
+ constexpr int N = 32 ;
153
+ #ifdef FULL_ATOMIC64_COVERAGE
154
+ if (!q.get_device ().has (aspect::atomic64)) {
155
+ std::cout << " Skipping atomic64 tests\n " ;
156
+ return ;
157
+ }
158
+
159
+ load_test_orders_scopes<space, double >(q, N);
160
+ if constexpr (sizeof (long ) == 8 ) {
161
+ load_test_orders_scopes<space, long >(q, N);
162
+ load_test_orders_scopes<space, unsigned long >(q, N);
163
+ }
164
+ if constexpr (sizeof (long long ) == 8 ) {
165
+ load_test_orders_scopes<space, long long >(q, N);
166
+ load_test_orders_scopes<space, unsigned long long >(q, N);
167
+ }
168
+ if constexpr (sizeof (char *) == 8 ) {
169
+ load_test_orders_scopes<space, char *>(q, N);
170
+ }
171
+ #endif
172
+ load_test_orders_scopes<space, float >(q, N);
173
+ #ifdef FULL_ATOMIC32_COVERAGE
174
+ load_test_orders_scopes<space, int >(q, N);
175
+ load_test_orders_scopes<space, unsigned int >(q, N);
176
+ if constexpr (sizeof (long ) == 4 ) {
177
+ load_test_orders_scopes<space, long >(q, N);
178
+ load_test_orders_scopes<space, unsigned long >(q, N);
179
+ }
180
+ if constexpr (sizeof (char *) == 4 ) {
181
+ load_test_orders_scopes<space, char *>(q, N);
182
+ }
183
+ #endif
184
+
185
+ std::cout << " Test passed." << std::endl;
59
186
}
0 commit comments