Skip to content

Commit 75fa67d

Browse files
romanovvladvladimirlaz
authored andcommitted
[SYCL] Refactoring of handler class.
Modified handler class so it creates special CG class. CG class represents different types of command groups, such as kernel execution, fill and so on. This is needed to decouple Scheduler from template parameters introduced by lambda, various ranges and arguments. Signed-off-by: Vlad Romanov <vlad.romanov@intel.com>
1 parent 80f72f2 commit 75fa67d

File tree

4 files changed

+1339
-1
lines changed

4 files changed

+1339
-1
lines changed

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
#include <CL/sycl/accessor.hpp>
1212
#include <CL/sycl/atomic.hpp>
13-
#include <CL/sycl/builtins.hpp>
1413
#include <CL/sycl/buffer.hpp>
14+
#include <CL/sycl/builtins.hpp>
1515
#include <CL/sycl/context.hpp>
1616
#include <CL/sycl/device.hpp>
1717
#include <CL/sycl/device_selector.hpp>

sycl/include/CL/sycl/detail/cg.hpp

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
//==-------------- CG.hpp - SYCL standard header file ----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/accessor_impl.hpp>
12+
#include <CL/sycl/detail/helpers.hpp>
13+
#include <CL/sycl/detail/kernel_desc.hpp>
14+
#include <CL/sycl/id.hpp>
15+
#include <CL/sycl/kernel.hpp>
16+
#include <CL/sycl/nd_item.hpp>
17+
#include <CL/sycl/range.hpp>
18+
19+
#include <memory>
20+
#include <string>
21+
#include <type_traits>
22+
#include <vector>
23+
24+
namespace cl {
25+
namespace sycl {
26+
namespace detail {
27+
28+
using namespace cl;
29+
30+
// The structure represents kernel argument.
31+
class ArgDesc {
32+
public:
33+
ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size,
34+
int Index)
35+
: MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
36+
37+
sycl::detail::kernel_param_kind_t MType;
38+
void *MPtr;
39+
int MSize;
40+
int MIndex;
41+
};
42+
43+
// The structure represents NDRange - global, local sizes, global offset and
44+
// number of dimensions.
45+
class NDRDescT {
46+
// The method initializes all sizes for dimensions greater than the passed one
47+
// to the default values, so they will not affect execution.
48+
template <int Dims_> void setNDRangeLeftover() {
49+
for (int I = Dims_; I < 3; ++I) {
50+
GlobalSize[I] = 1;
51+
LocalSize[I] = 1;
52+
GlobalOffset[I] = 0;
53+
}
54+
}
55+
56+
public:
57+
NDRDescT() = default;
58+
59+
template <int Dims_> void set(sycl::range<Dims_> NumWorkItems) {
60+
for (int I = 0; I < Dims_; ++I) {
61+
GlobalSize[I] = NumWorkItems[I];
62+
LocalSize[I] = 1;
63+
GlobalOffset[I] = 0;
64+
}
65+
66+
setNDRangeLeftover<Dims_>();
67+
Dims = Dims_;
68+
}
69+
70+
template <int Dims_> void set(sycl::nd_range<Dims_> ExecutionRange) {
71+
for (int I = 0; I < Dims_; ++I) {
72+
GlobalSize[I] = ExecutionRange.get_global_range()[I];
73+
LocalSize[I] = ExecutionRange.get_local_range()[I];
74+
GlobalOffset[I] = ExecutionRange.get_offset()[I];
75+
}
76+
setNDRangeLeftover<Dims_>();
77+
Dims = Dims_;
78+
}
79+
80+
sycl::range<3> GlobalSize;
81+
sycl::range<3> LocalSize;
82+
sycl::id<3> GlobalOffset;
83+
size_t Dims;
84+
};
85+
86+
// The pure virtual class aimed to store lambda/functors of any type.
87+
class HostKernelBase {
88+
public:
89+
// The method executes lambda stored using NDRange passed.
90+
virtual void call(const NDRDescT &NDRDesc) = 0;
91+
// Return pointer to the lambda object.
92+
// Used to extract captured variables.
93+
virtual char *getPtr() = 0;
94+
virtual ~HostKernelBase() = default;
95+
};
96+
97+
// Class which stores specific lambda object.
98+
template <class KernelType, class KernelArgType, int Dims>
99+
class HostKernel : public HostKernelBase {
100+
using IDBuilder = sycl::detail::Builder;
101+
KernelType MKernel;
102+
103+
public:
104+
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
105+
void call(const NDRDescT &NDRDesc) override { runOnHost(NDRDesc); }
106+
107+
char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }
108+
109+
template <class ArgT = KernelArgType>
110+
typename std::enable_if<std::is_same<ArgT, void>::value>::type
111+
runOnHost(const NDRDescT &NDRDesc) {
112+
MKernel();
113+
}
114+
115+
template <class ArgT = KernelArgType>
116+
typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value>::type
117+
runOnHost(const NDRDescT &NDRDesc) {
118+
size_t XYZ[3] = {0};
119+
sycl::id<Dims> ID;
120+
for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) {
121+
XYZ[1] = 0;
122+
for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) {
123+
XYZ[0] = 0;
124+
for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) {
125+
for (int I = 0; I < Dims; ++I)
126+
ID[I] = XYZ[I];
127+
MKernel(ID);
128+
}
129+
}
130+
}
131+
}
132+
133+
template <class ArgT = KernelArgType>
134+
typename std::enable_if<
135+
(std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value ||
136+
std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value)>::type
137+
runOnHost(const NDRDescT &NDRDesc) {
138+
size_t XYZ[3] = {0};
139+
sycl::id<Dims> ID;
140+
sycl::range<Dims> Range;
141+
for (int I = 0; I < Dims; ++I)
142+
Range[I] = NDRDesc.GlobalSize[I];
143+
144+
for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) {
145+
XYZ[1] = 0;
146+
for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) {
147+
XYZ[0] = 0;
148+
for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) {
149+
for (int I = 0; I < Dims; ++I)
150+
ID[I] = XYZ[I];
151+
152+
sycl::item<Dims, /*Offset=*/false> Item =
153+
IDBuilder::createItem<Dims, false>(Range, ID);
154+
MKernel(Item);
155+
}
156+
}
157+
}
158+
}
159+
160+
template <class ArgT = KernelArgType>
161+
typename std::enable_if<std::is_same<ArgT, nd_item<Dims>>::value>::type
162+
runOnHost(const NDRDescT &NDRDesc) {
163+
// TODO add offset logic
164+
165+
sycl::id<3> GroupSize;
166+
for (int I = 0; I < 3; ++I) {
167+
GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
168+
}
169+
170+
sycl::range<Dims> GlobalSize;
171+
sycl::range<Dims> LocalSize;
172+
sycl::id<Dims> GlobalOffset;
173+
for (int I = 0; I < Dims; ++I) {
174+
GlobalOffset[I] = NDRDesc.GlobalOffset[I];
175+
LocalSize[I] = NDRDesc.LocalSize[I];
176+
GlobalSize[I] = NDRDesc.GlobalSize[I];
177+
}
178+
179+
sycl::id<Dims> GlobalID;
180+
sycl::id<Dims> LocalID;
181+
182+
size_t GroupXYZ[3] = {0};
183+
sycl::id<Dims> GroupID;
184+
for (; GroupXYZ[2] < GroupSize[2]; ++GroupXYZ[2]) {
185+
GroupXYZ[1] = 0;
186+
for (; GroupXYZ[1] < GroupSize[1]; ++GroupXYZ[1]) {
187+
GroupXYZ[0] = 0;
188+
for (; GroupXYZ[0] < GroupSize[0]; ++GroupXYZ[0]) {
189+
for (int I = 0; I < Dims; ++I)
190+
GroupID[I] = GroupXYZ[I];
191+
192+
sycl::group<Dims> Group =
193+
IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, GroupID);
194+
size_t LocalXYZ[3] = {0};
195+
for (; LocalXYZ[2] < NDRDesc.LocalSize[2]; ++LocalXYZ[2]) {
196+
LocalXYZ[1] = 0;
197+
for (; LocalXYZ[1] < NDRDesc.LocalSize[1]; ++LocalXYZ[1]) {
198+
LocalXYZ[0] = 0;
199+
for (; LocalXYZ[0] < NDRDesc.LocalSize[0]; ++LocalXYZ[0]) {
200+
201+
for (int I = 0; I < Dims; ++I) {
202+
GlobalID[I] = GroupXYZ[I] * LocalSize[I] + LocalXYZ[I];
203+
LocalID[I] = LocalXYZ[I];
204+
}
205+
const sycl::item<Dims, /*Offset=*/true> GlobalItem =
206+
IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
207+
GlobalOffset);
208+
const sycl::item<Dims, /*Offset=*/false> LocalItem =
209+
IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
210+
const sycl::nd_item<Dims> NDItem =
211+
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
212+
MKernel(NDItem);
213+
}
214+
}
215+
}
216+
}
217+
}
218+
}
219+
}
220+
~HostKernel() = default;
221+
};
222+
223+
// The base class for all types of command groups.
224+
class CG {
225+
public:
226+
// Type of the command group.
227+
enum CGTYPE {
228+
KERNEL,
229+
COPY_ACC_TO_PTR,
230+
COPY_PTR_TO_ACC,
231+
COPY_ACC_TO_ACC,
232+
FILL,
233+
UPDATE_HOST
234+
};
235+
236+
CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage,
237+
std::vector<detail::AccessorImplPtr> AccStorage,
238+
std::vector<std::shared_ptr<void>> SharedPtrStorage,
239+
std::vector<Requirement *> Requirements)
240+
: MType(Type), MArgsStorage(std::move(ArgsStorage)),
241+
MAccStorage(std::move(AccStorage)),
242+
MSharedPtrStorage(std::move(SharedPtrStorage)),
243+
MRequirements(std::move(Requirements)) {}
244+
245+
CG(CG &&CommandGroup) = default;
246+
247+
std::vector<Requirement *> getRequirements() const { return MRequirements; }
248+
249+
CGTYPE getType() { return MType; }
250+
251+
private:
252+
CGTYPE MType;
253+
// The following storages needed to ensure that arguments won't die while
254+
// we are using them.
255+
// Storage for standard layout arguments.
256+
std::vector<std::vector<char>> MArgsStorage;
257+
// Storage for accessors.
258+
std::vector<detail::AccessorImplPtr> MAccStorage;
259+
// Storage for shared_ptrs.
260+
std::vector<std::shared_ptr<void>> MSharedPtrStorage;
261+
// List of requirements that specify which memory is needed for the command
262+
// group to be executed.
263+
std::vector<Requirement *> MRequirements;
264+
};
265+
266+
// The class which represents "execute kernel" command group.
267+
class CGExecKernel : public CG {
268+
public:
269+
NDRDescT MNDRDesc;
270+
std::unique_ptr<HostKernelBase> MHostKernel;
271+
std::shared_ptr<detail::kernel_impl> MSyclKernel;
272+
std::vector<ArgDesc> MArgs;
273+
std::string MKernelName;
274+
detail::OSModuleHandle MOSModuleHandle;
275+
276+
CGExecKernel(NDRDescT NDRDesc, std::unique_ptr<HostKernelBase> HKernel,
277+
std::shared_ptr<detail::kernel_impl> SyclKernel,
278+
std::vector<std::vector<char>> ArgsStorage,
279+
std::vector<detail::AccessorImplPtr> AccStorage,
280+
std::vector<std::shared_ptr<void>> SharedPtrStorage,
281+
std::vector<Requirement *> Requirements,
282+
std::vector<ArgDesc> Args, std::string KernelName,
283+
detail::OSModuleHandle OSModuleHandle)
284+
: CG(KERNEL, std::move(ArgsStorage), std::move(AccStorage),
285+
std::move(SharedPtrStorage), std::move(Requirements)),
286+
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
287+
MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)),
288+
MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle) {}
289+
290+
std::vector<ArgDesc> getArguments() const { return MArgs; }
291+
std::string getKernelName() const { return MKernelName; }
292+
};
293+
294+
// The class which represents "copy" command group.
295+
class CGCopy : public CG {
296+
void *MSrc;
297+
void *MDst;
298+
299+
public:
300+
CGCopy(CGTYPE CopyType, void *Src, void *Dst,
301+
std::vector<std::vector<char>> ArgsStorage,
302+
std::vector<detail::AccessorImplPtr> AccStorage,
303+
std::vector<std::shared_ptr<void>> SharedPtrStorage,
304+
std::vector<Requirement *> Requirements)
305+
: CG(CopyType, std::move(ArgsStorage), std::move(AccStorage),
306+
std::move(SharedPtrStorage), std::move(Requirements)),
307+
MSrc(Src), MDst(Dst) {}
308+
void *getSrc() { return MSrc; }
309+
void *getDst() { return MDst; }
310+
};
311+
312+
// The class which represents "fill" command group.
313+
class CGFill : public CG {
314+
public:
315+
std::vector<char> MPattern;
316+
Requirement *MPtr;
317+
318+
CGFill(std::vector<char> Pattern, void *Ptr,
319+
std::vector<std::vector<char>> ArgsStorage,
320+
std::vector<detail::AccessorImplPtr> AccStorage,
321+
std::vector<std::shared_ptr<void>> SharedPtrStorage,
322+
std::vector<Requirement *> Requirements)
323+
: CG(FILL, std::move(ArgsStorage), std::move(AccStorage),
324+
std::move(SharedPtrStorage), std::move(Requirements)),
325+
MPattern(std::move(Pattern)), MPtr((Requirement *)Ptr) {}
326+
Requirement *getReqToFill() { return MPtr; }
327+
};
328+
329+
// The class which represents "update host" command group.
330+
class CGUpdateHost : public CG {
331+
Requirement *MPtr;
332+
333+
public:
334+
CGUpdateHost(void *Ptr, std::vector<std::vector<char>> ArgsStorage,
335+
std::vector<detail::AccessorImplPtr> AccStorage,
336+
std::vector<std::shared_ptr<void>> SharedPtrStorage,
337+
std::vector<Requirement *> Requirements)
338+
: CG(UPDATE_HOST, std::move(ArgsStorage), std::move(AccStorage),
339+
std::move(SharedPtrStorage), std::move(Requirements)),
340+
MPtr((Requirement *)Ptr) {}
341+
342+
Requirement *getReqToUpdate() { return MPtr; }
343+
};
344+
345+
} // namespace cl
346+
} // namespace sycl
347+
} // namespace detail

sycl/include/CL/sycl/handler.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#ifdef SCHEDULER_20
10+
#include <CL/sycl/handler2.hpp>
11+
12+
#else
13+
914
#pragma once
1015

1116
#include <CL/__spirv/spirv_vars.hpp>
@@ -728,3 +733,4 @@ class handler {
728733
};
729734
} // namespace sycl
730735
} // namespace cl
736+
#endif // SCHEDULER_20

0 commit comments

Comments
 (0)