Skip to content

Commit a3e9aab

Browse files
[SYCL] [FPGA] Create experimental headers for FPGA latency control (#5066)
Create experimental header files that provide user API for FPGA latency control feature. experimental/fpga_lsu.hpp and experimental/pipes.hpp are simply the original version plus latency control API described in the extension documents. experimental/fpga_utils.hpp provides utility functions used by experimental LSU and pipe headers. fpga_extensions.hpp will include new experimental LSU and pipe headers. Related PR for extension documents: #5027 Test: intel/llvm-test-suite#596
1 parent 35729a7 commit a3e9aab

File tree

4 files changed

+497
-0
lines changed

4 files changed

+497
-0
lines changed
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
//==-------------- fpga_lsu.hpp --- SYCL FPGA LSU Extensions ---------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include "fpga_utils.hpp"
11+
#include <CL/sycl/detail/defines.hpp>
12+
#include <CL/sycl/pointers.hpp>
13+
14+
__SYCL_INLINE_NAMESPACE(cl) {
15+
namespace sycl {
16+
namespace ext {
17+
namespace intel {
18+
namespace experimental {
19+
20+
constexpr uint8_t BURST_COALESCE = 0x1;
21+
constexpr uint8_t CACHE = 0x2;
22+
constexpr uint8_t STATICALLY_COALESCE = 0x4;
23+
constexpr uint8_t PREFETCH = 0x8;
24+
25+
template <int32_t _N> struct burst_coalesce_impl {
26+
static constexpr int32_t value = _N;
27+
static constexpr int32_t default_value = 0;
28+
};
29+
30+
template <int32_t _N> struct cache {
31+
static constexpr int32_t value = _N;
32+
static constexpr int32_t default_value = 0;
33+
};
34+
35+
template <int32_t _N> struct prefetch_impl {
36+
static constexpr int32_t value = _N;
37+
static constexpr int32_t default_value = 0;
38+
};
39+
40+
template <int32_t _N> struct statically_coalesce_impl {
41+
static constexpr int32_t value = _N;
42+
static constexpr int32_t default_value = 1;
43+
};
44+
45+
template <bool _B> using burst_coalesce = burst_coalesce_impl<_B>;
46+
template <bool _B> using prefetch = prefetch_impl<_B>;
47+
template <bool _B> using statically_coalesce = statically_coalesce_impl<_B>;
48+
49+
template <class... _mem_access_params> class lsu final {
50+
public:
51+
lsu() = delete;
52+
53+
template <class... _Params, typename _T, access::address_space _space>
54+
static _T load(sycl::multi_ptr<_T, _space> Ptr) {
55+
check_space<_space>();
56+
check_load();
57+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
58+
static constexpr auto _anchor_id =
59+
_GetValue<int32_t, latency_anchor_id, _Params...>::value;
60+
static constexpr auto _constraint =
61+
_GetValue3<int32_t, type, int32_t, latency_constraint,
62+
_Params...>::value;
63+
64+
static constexpr int32_t _target_anchor = std::get<0>(_constraint);
65+
static constexpr type _control_type = std::get<1>(_constraint);
66+
static constexpr int32_t _cycle = std::get<2>(_constraint);
67+
int32_t _type = 0; // Default: _control_type == type::none
68+
if constexpr (_control_type == type::exact) {
69+
_type = 1;
70+
} else if constexpr (_control_type == type::max) {
71+
_type = 2;
72+
} else if constexpr (_control_type == type::min) {
73+
_type = 3;
74+
}
75+
76+
return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
77+
_type, _cycle);
78+
#else
79+
return *Ptr;
80+
#endif
81+
}
82+
83+
template <class... _Params, typename _T, access::address_space _space>
84+
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
85+
check_space<_space>();
86+
check_store();
87+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
88+
static constexpr auto _anchor_id =
89+
_GetValue<int32_t, latency_anchor_id, _Params...>::value;
90+
static constexpr auto _constraint =
91+
_GetValue3<int32_t, type, int32_t, latency_constraint,
92+
_Params...>::value;
93+
94+
static constexpr int32_t _target_anchor = std::get<0>(_constraint);
95+
static constexpr type _control_type = std::get<1>(_constraint);
96+
static constexpr int32_t _cycle = std::get<2>(_constraint);
97+
int32_t _type = 0; // Default: _control_type == type::none
98+
if constexpr (_control_type == type::exact) {
99+
_type = 1;
100+
} else if constexpr (_control_type == type::max) {
101+
_type = 2;
102+
} else if constexpr (_control_type == type::min) {
103+
_type = 3;
104+
}
105+
106+
*__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, _type,
107+
_cycle) = Val;
108+
#else
109+
*Ptr = Val;
110+
#endif
111+
}
112+
113+
private:
114+
static constexpr int32_t _burst_coalesce_val =
115+
_GetValue<int32_t, burst_coalesce_impl, _mem_access_params...>::value;
116+
static constexpr uint8_t _burst_coalesce =
117+
_burst_coalesce_val == 1 ? BURST_COALESCE : 0;
118+
119+
static constexpr int32_t _cache_val =
120+
_GetValue<int32_t, cache, _mem_access_params...>::value;
121+
static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0;
122+
123+
static constexpr int32_t _statically_coalesce_val =
124+
_GetValue<int32_t, statically_coalesce_impl,
125+
_mem_access_params...>::value;
126+
static constexpr uint8_t _dont_statically_coalesce =
127+
_statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0;
128+
129+
static constexpr int32_t _prefetch_val =
130+
_GetValue<int32_t, prefetch_impl, _mem_access_params...>::value;
131+
static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0;
132+
133+
static_assert(_cache_val >= 0, "cache size parameter must be non-negative");
134+
135+
template <access::address_space _space> static void check_space() {
136+
static_assert(_space == access::address_space::global_space ||
137+
_space == access::address_space::global_device_space ||
138+
_space == access::address_space::global_host_space,
139+
"lsu controls are only supported for global_ptr, "
140+
"device_ptr, and host_ptr objects");
141+
}
142+
143+
static void check_load() {
144+
static_assert(_cache == 0 || _burst_coalesce == BURST_COALESCE,
145+
"unable to implement a cache without a burst coalescer");
146+
static_assert(_prefetch == 0 || _burst_coalesce == 0,
147+
"unable to implement a prefetcher and a burst coalescer "
148+
"simulataneously");
149+
static_assert(
150+
_prefetch == 0 || _cache == 0,
151+
"unable to implement a prefetcher and a cache simulataneously");
152+
}
153+
static void check_store() {
154+
static_assert(_cache == 0, "unable to implement a store LSU with a cache.");
155+
static_assert(_prefetch == 0,
156+
"unable to implement a store LSU with a prefetcher.");
157+
}
158+
159+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
160+
// FPGA BE will recognize this function and extract its arguments.
161+
// TODO: Pass latency control params via __builtin_intel_fpga_mem when ready.
162+
template <typename _T>
163+
static _T *__latency_control_mem_wrapper(_T *Ptr, int32_t AnchorID,
164+
int32_t TargetAnchor, int32_t Type,
165+
int32_t Cycle) {
166+
return __builtin_intel_fpga_mem(
167+
Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch,
168+
_cache_val);
169+
}
170+
#endif
171+
};
172+
173+
} // namespace experimental
174+
} // namespace intel
175+
} // namespace ext
176+
} // namespace sycl
177+
} // __SYCL_INLINE_NAMESPACE(cl)
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
//==------------- fpga_utils.hpp --- SYCL FPGA Reg Extensions --------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/defines.hpp>
12+
#include <CL/sycl/detail/stl_type_traits.hpp>
13+
#include <CL/sycl/stl.hpp>
14+
#include <tuple>
15+
16+
__SYCL_INLINE_NAMESPACE(cl) {
17+
namespace sycl {
18+
namespace ext {
19+
namespace intel {
20+
namespace experimental {
21+
22+
enum class type {
23+
none, // default
24+
exact,
25+
max,
26+
min
27+
};
28+
29+
template <int32_t _N> struct latency_anchor_id {
30+
static constexpr int32_t value = _N;
31+
static constexpr int32_t default_value = -1;
32+
};
33+
34+
template <int32_t _N1, type _N2, int32_t _N3> struct latency_constraint {
35+
static constexpr std::tuple<int32_t, type, int32_t> value = {_N1, _N2, _N3};
36+
static constexpr std::tuple<int32_t, type, int32_t> default_value = {
37+
0, type::none, 0};
38+
};
39+
40+
using ignoreParam_int_t = int32_t;
41+
constexpr ignoreParam_int_t IgnoreParamInt{};
42+
using ignoreParam_enum_t = type;
43+
constexpr ignoreParam_enum_t IgnoreParamEnum{};
44+
45+
template <class _VType, class _T> struct _ValueExtractorImp {
46+
static constexpr auto _First = _T::value;
47+
static constexpr auto _Second = IgnoreParamEnum;
48+
static constexpr auto _Third = IgnoreParamInt;
49+
};
50+
51+
template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird, class _T>
52+
struct _ValueExtractorImp<
53+
const std::tuple<_VTypeFirst, _VTypeSecond, _VTypeThird>, _T> {
54+
static constexpr auto _First = std::get<0>(_T::value);
55+
static constexpr auto _Second = std::get<1>(_T::value);
56+
static constexpr auto _Third = std::get<2>(_T::value);
57+
};
58+
59+
template <class _T>
60+
struct _ValueExtractor : _ValueExtractorImp<decltype(_T::value), _T> {};
61+
62+
template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
63+
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
64+
class _T>
65+
struct _MatchType
66+
: std::is_same<
67+
_Type<_ValueExtractor<_T>::_First, _ValueExtractor<_T>::_Second,
68+
_ValueExtractor<_T>::_Third>,
69+
_T> {};
70+
71+
template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
72+
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
73+
class... _T>
74+
struct _GetValue3 {
75+
static constexpr auto value =
76+
_Type<_VTypeFirst{}, _VTypeSecond{}, _VTypeThird{}>::default_value;
77+
};
78+
79+
template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
80+
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
81+
class _T1, class... _T>
82+
struct _GetValue3<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T1, _T...> {
83+
static constexpr auto value = std::conditional<
84+
_MatchType<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T1>::value,
85+
_T1, _GetValue3<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T...>>::
86+
type::value;
87+
};
88+
89+
template <class _VType, template <_VType> class _Type, class... _T>
90+
struct _GetValue {
91+
private:
92+
template <_VType _V1, ignoreParam_enum_t, ignoreParam_int_t>
93+
using _Type2 = _Type<_V1>;
94+
95+
public:
96+
static constexpr auto value =
97+
_GetValue3<_VType, ignoreParam_enum_t, ignoreParam_int_t, _Type2,
98+
_T...>::value;
99+
};
100+
101+
} // namespace experimental
102+
} // namespace intel
103+
} // namespace ext
104+
} // namespace sycl
105+
} // __SYCL_INLINE_NAMESPACE(cl)

0 commit comments

Comments
 (0)