Skip to content

Commit 1269348

Browse files
authored
[SYCL][FPGA][NFC] LSU builtin test and documentation update (#3546)
Updating documentation to reflect change to multi_ptr. Updating test so it tests device, host ptrs as well as accessor method.
1 parent 5297ca0 commit 1269348

File tree

2 files changed

+73
-36
lines changed

2 files changed

+73
-36
lines changed

sycl/doc/extensions/IntelFPGA/FPGALsu.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ is included in `CL/sycl/INTEL/fpga_extensions.hpp`.
77
The class `cl::sycl::INTEL::lsu` allows users to explicitly request that the
88
implementation of a global memory access is configured in a certain way. The
99
class has two member functions, `load()` and `store()` which allow loading from
10-
and storing to a `global_ptr`, respectively, and is templated on the following
10+
and storing to a `multi_ptr`, respectively, and is templated on the following
1111
4 optional paremeters:
1212

1313
1. **`cl::sycl::INTEL::burst_coalesce<B>`, where `B` is a boolean**: request,
@@ -47,10 +47,12 @@ template <class... mem_access_params> class lsu final {
4747
public:
4848
lsu() = delete;
4949

50-
template <typename T> static T load(sycl::global_ptr<T> Ptr) {
50+
template <typename _T, access::address_space _space>
51+
static _T load(sycl::multi_ptr<_T, _space> Ptr) {
52+
check_space<_space>();
5153
check_load();
5254
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
53-
return *__builtin_intel_fpga_mem((T *)Ptr,
55+
return *__builtin_intel_fpga_mem((_T *)Ptr,
5456
_burst_coalesce | _cache |
5557
_dont_statically_coalesce | _prefetch,
5658
_cache_val);
@@ -59,10 +61,12 @@ public:
5961
#endif
6062
}
6163

62-
template <typename T> static void store(sycl::global_ptr<T> Ptr, T Val) {
64+
template <typename _T, access::address_space _space>
65+
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
66+
check_space<_space>();
6367
check_store();
6468
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
65-
*__builtin_intel_fpga_mem((T *)Ptr,
69+
*__builtin_intel_fpga_mem((_T *)Ptr,
6670
_burst_coalesce | _cache |
6771
_dont_statically_coalesce | _prefetch,
6872
_cache_val) = Val;

sycl/test/extensions/fpga.cpp

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,30 @@ template <unsigned ID> struct ethernet_pipe_id {
77
static constexpr unsigned id = ID;
88
};
99

10+
template <typename T, cl::sycl::access::address_space space>
11+
void lsu_body(cl::sycl::multi_ptr<T,space> input_ptr,cl::sycl::multi_ptr<T,space> output_ptr) {
12+
using PrefetchingLSU =
13+
cl::sycl::INTEL::lsu<cl::sycl::INTEL::prefetch<true>,
14+
cl::sycl::INTEL::statically_coalesce<false>>;
15+
16+
using BurstCoalescedLSU =
17+
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
18+
cl::sycl::INTEL::statically_coalesce<false>>;
19+
20+
using CachingLSU =
21+
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
22+
cl::sycl::INTEL::cache<1024>,
23+
cl::sycl::INTEL::statically_coalesce<false>>;
24+
25+
using PipelinedLSU = cl::sycl::INTEL::lsu<>;
26+
27+
int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
28+
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
29+
30+
BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
31+
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
32+
}
33+
1034
using ethernet_read_pipe =
1135
sycl::INTEL::kernel_readable_io_pipe<ethernet_pipe_id<0>, int, 0>;
1236
using ethernet_write_pipe =
@@ -58,39 +82,48 @@ int main() {
5882

5983
/*Check LSU interface*/
6084
{
61-
cl::sycl::buffer<int, 1> output_buffer(1);
62-
auto *in_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());
63-
64-
Queue.submit([&](cl::sycl::handler &cgh) {
65-
auto output_accessor =
66-
output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
67-
68-
cgh.single_task<class kernel>([=] {
69-
cl::sycl::host_ptr<int> input_ptr(in_ptr);
70-
auto output_ptr = output_accessor.get_pointer();
7185

72-
using PrefetchingLSU =
73-
cl::sycl::INTEL::lsu<cl::sycl::INTEL::prefetch<true>,
74-
cl::sycl::INTEL::statically_coalesce<false>>;
75-
76-
using BurstCoalescedLSU =
77-
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
78-
cl::sycl::INTEL::statically_coalesce<false>>;
79-
80-
using CachingLSU =
81-
cl::sycl::INTEL::lsu<cl::sycl::INTEL::burst_coalesce<true>,
82-
cl::sycl::INTEL::cache<1024>,
83-
cl::sycl::INTEL::statically_coalesce<false>>;
84-
85-
using PipelinedLSU = cl::sycl::INTEL::lsu<>;
86-
87-
int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
88-
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
89-
90-
BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
91-
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
86+
{
87+
auto *out_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());
88+
auto *in_ptr = cl::sycl::malloc_host<int>(1, Queue.get_context());
89+
Queue.submit([&](sycl::handler &cgh) {
90+
cgh.single_task<class HostAnnotation>([=]() {
91+
cl::sycl::host_ptr<int> input_ptr(in_ptr);
92+
cl::sycl::host_ptr<int> output_ptr(out_ptr);
93+
intelfpga::lsu_body<
94+
int, cl::sycl::access::address_space::global_host_space>(
95+
input_ptr, output_ptr);
96+
});
9297
});
93-
});
98+
}
99+
{
100+
auto *out_ptr = cl::sycl::malloc_device<int>(1, Queue);
101+
auto *in_ptr = cl::sycl::malloc_device<int>(1, Queue);
102+
Queue.submit([&](sycl::handler &cgh) {
103+
cgh.single_task<class DeviceAnnotation>([=]() {
104+
cl::sycl::device_ptr<int> input_ptr(in_ptr);
105+
cl::sycl::device_ptr<int> output_ptr(out_ptr);
106+
intelfpga::lsu_body<
107+
int, cl::sycl::access::address_space::global_device_space>(
108+
input_ptr, output_ptr);
109+
});
110+
});
111+
}
112+
{
113+
cl::sycl::buffer<int, 1> output_buffer(1);
114+
cl::sycl::buffer<int, 1> input_buffer(1);
115+
Queue.submit([&](sycl::handler &cgh) {
116+
auto output_accessor =
117+
output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
118+
auto input_accessor =
119+
input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
120+
cgh.single_task<class AccessorAnnotation>([=]() {
121+
auto input_ptr = input_accessor.get_pointer();
122+
auto output_ptr = output_accessor.get_pointer();
123+
intelfpga::lsu_body<>(input_ptr, output_ptr);
124+
});
125+
});
126+
}
94127
}
95128

96129
return 0;

0 commit comments

Comments
 (0)