|
| 1 | +//==-------------- CG.hpp - SYCL standard header file ----------------------==// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#pragma once |
| 10 | + |
| 11 | +#include <CL/sycl/detail/accessor_impl.hpp> |
| 12 | +#include <CL/sycl/detail/helpers.hpp> |
| 13 | +#include <CL/sycl/detail/kernel_desc.hpp> |
| 14 | +#include <CL/sycl/id.hpp> |
| 15 | +#include <CL/sycl/kernel.hpp> |
| 16 | +#include <CL/sycl/nd_item.hpp> |
| 17 | +#include <CL/sycl/range.hpp> |
| 18 | + |
| 19 | +#include <memory> |
| 20 | +#include <string> |
| 21 | +#include <type_traits> |
| 22 | +#include <vector> |
| 23 | + |
| 24 | +namespace cl { |
| 25 | +namespace sycl { |
| 26 | +namespace detail { |
| 27 | + |
| 28 | +using namespace cl; |
| 29 | + |
| 30 | +// The structure represents kernel argument. |
| 31 | +class ArgDesc { |
| 32 | +public: |
| 33 | + ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, |
| 34 | + int Index) |
| 35 | + : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} |
| 36 | + |
| 37 | + sycl::detail::kernel_param_kind_t MType; |
| 38 | + void *MPtr; |
| 39 | + int MSize; |
| 40 | + int MIndex; |
| 41 | +}; |
| 42 | + |
| 43 | +// The structure represents NDRange - global, local sizes, global offset and |
| 44 | +// number of dimensions. |
| 45 | +class NDRDescT { |
| 46 | + // The method initializes all sizes for dimensions greater than the passed one |
| 47 | + // to the default values, so they will not affect execution. |
| 48 | + template <int Dims_> void setNDRangeLeftover() { |
| 49 | + for (int I = Dims_; I < 3; ++I) { |
| 50 | + GlobalSize[I] = 1; |
| 51 | + LocalSize[I] = 1; |
| 52 | + GlobalOffset[I] = 0; |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | +public: |
| 57 | + NDRDescT() = default; |
| 58 | + |
| 59 | + template <int Dims_> void set(sycl::range<Dims_> NumWorkItems) { |
| 60 | + for (int I = 0; I < Dims_; ++I) { |
| 61 | + GlobalSize[I] = NumWorkItems[I]; |
| 62 | + LocalSize[I] = 1; |
| 63 | + GlobalOffset[I] = 0; |
| 64 | + } |
| 65 | + |
| 66 | + setNDRangeLeftover<Dims_>(); |
| 67 | + Dims = Dims_; |
| 68 | + } |
| 69 | + |
| 70 | + template <int Dims_> void set(sycl::nd_range<Dims_> ExecutionRange) { |
| 71 | + for (int I = 0; I < Dims_; ++I) { |
| 72 | + GlobalSize[I] = ExecutionRange.get_global_range()[I]; |
| 73 | + LocalSize[I] = ExecutionRange.get_local_range()[I]; |
| 74 | + GlobalOffset[I] = ExecutionRange.get_offset()[I]; |
| 75 | + } |
| 76 | + setNDRangeLeftover<Dims_>(); |
| 77 | + Dims = Dims_; |
| 78 | + } |
| 79 | + |
| 80 | + sycl::range<3> GlobalSize; |
| 81 | + sycl::range<3> LocalSize; |
| 82 | + sycl::id<3> GlobalOffset; |
| 83 | + size_t Dims; |
| 84 | +}; |
| 85 | + |
| 86 | +// The pure virtual class aimed to store lambda/functors of any type. |
| 87 | +class HostKernelBase { |
| 88 | +public: |
| 89 | + // The method executes lambda stored using NDRange passed. |
| 90 | + virtual void call(const NDRDescT &NDRDesc) = 0; |
| 91 | + // Return pointer to the lambda object. |
| 92 | + // Used to extract captured variables. |
| 93 | + virtual char *getPtr() = 0; |
| 94 | + virtual ~HostKernelBase() = default; |
| 95 | +}; |
| 96 | + |
| 97 | +// Class which stores specific lambda object. |
| 98 | +template <class KernelType, class KernelArgType, int Dims> |
| 99 | +class HostKernel : public HostKernelBase { |
| 100 | + using IDBuilder = sycl::detail::Builder; |
| 101 | + KernelType MKernel; |
| 102 | + |
| 103 | +public: |
| 104 | + HostKernel(KernelType Kernel) : MKernel(Kernel) {} |
| 105 | + void call(const NDRDescT &NDRDesc) override { runOnHost(NDRDesc); } |
| 106 | + |
| 107 | + char *getPtr() override { return reinterpret_cast<char *>(&MKernel); } |
| 108 | + |
| 109 | + template <class ArgT = KernelArgType> |
| 110 | + typename std::enable_if<std::is_same<ArgT, void>::value>::type |
| 111 | + runOnHost(const NDRDescT &NDRDesc) { |
| 112 | + MKernel(); |
| 113 | + } |
| 114 | + |
| 115 | + template <class ArgT = KernelArgType> |
| 116 | + typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value>::type |
| 117 | + runOnHost(const NDRDescT &NDRDesc) { |
| 118 | + size_t XYZ[3] = {0}; |
| 119 | + sycl::id<Dims> ID; |
| 120 | + for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) { |
| 121 | + XYZ[1] = 0; |
| 122 | + for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) { |
| 123 | + XYZ[0] = 0; |
| 124 | + for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) { |
| 125 | + for (int I = 0; I < Dims; ++I) |
| 126 | + ID[I] = XYZ[I]; |
| 127 | + MKernel(ID); |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + } |
| 132 | + |
| 133 | + template <class ArgT = KernelArgType> |
| 134 | + typename std::enable_if< |
| 135 | + (std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value || |
| 136 | + std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value)>::type |
| 137 | + runOnHost(const NDRDescT &NDRDesc) { |
| 138 | + size_t XYZ[3] = {0}; |
| 139 | + sycl::id<Dims> ID; |
| 140 | + sycl::range<Dims> Range; |
| 141 | + for (int I = 0; I < Dims; ++I) |
| 142 | + Range[I] = NDRDesc.GlobalSize[I]; |
| 143 | + |
| 144 | + for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) { |
| 145 | + XYZ[1] = 0; |
| 146 | + for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) { |
| 147 | + XYZ[0] = 0; |
| 148 | + for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) { |
| 149 | + for (int I = 0; I < Dims; ++I) |
| 150 | + ID[I] = XYZ[I]; |
| 151 | + |
| 152 | + sycl::item<Dims, /*Offset=*/false> Item = |
| 153 | + IDBuilder::createItem<Dims, false>(Range, ID); |
| 154 | + MKernel(Item); |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + template <class ArgT = KernelArgType> |
| 161 | + typename std::enable_if<std::is_same<ArgT, nd_item<Dims>>::value>::type |
| 162 | + runOnHost(const NDRDescT &NDRDesc) { |
| 163 | + // TODO add offset logic |
| 164 | + |
| 165 | + sycl::id<3> GroupSize; |
| 166 | + for (int I = 0; I < 3; ++I) { |
| 167 | + GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; |
| 168 | + } |
| 169 | + |
| 170 | + sycl::range<Dims> GlobalSize; |
| 171 | + sycl::range<Dims> LocalSize; |
| 172 | + sycl::id<Dims> GlobalOffset; |
| 173 | + for (int I = 0; I < Dims; ++I) { |
| 174 | + GlobalOffset[I] = NDRDesc.GlobalOffset[I]; |
| 175 | + LocalSize[I] = NDRDesc.LocalSize[I]; |
| 176 | + GlobalSize[I] = NDRDesc.GlobalSize[I]; |
| 177 | + } |
| 178 | + |
| 179 | + sycl::id<Dims> GlobalID; |
| 180 | + sycl::id<Dims> LocalID; |
| 181 | + |
| 182 | + size_t GroupXYZ[3] = {0}; |
| 183 | + sycl::id<Dims> GroupID; |
| 184 | + for (; GroupXYZ[2] < GroupSize[2]; ++GroupXYZ[2]) { |
| 185 | + GroupXYZ[1] = 0; |
| 186 | + for (; GroupXYZ[1] < GroupSize[1]; ++GroupXYZ[1]) { |
| 187 | + GroupXYZ[0] = 0; |
| 188 | + for (; GroupXYZ[0] < GroupSize[0]; ++GroupXYZ[0]) { |
| 189 | + for (int I = 0; I < Dims; ++I) |
| 190 | + GroupID[I] = GroupXYZ[I]; |
| 191 | + |
| 192 | + sycl::group<Dims> Group = |
| 193 | + IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, GroupID); |
| 194 | + size_t LocalXYZ[3] = {0}; |
| 195 | + for (; LocalXYZ[2] < NDRDesc.LocalSize[2]; ++LocalXYZ[2]) { |
| 196 | + LocalXYZ[1] = 0; |
| 197 | + for (; LocalXYZ[1] < NDRDesc.LocalSize[1]; ++LocalXYZ[1]) { |
| 198 | + LocalXYZ[0] = 0; |
| 199 | + for (; LocalXYZ[0] < NDRDesc.LocalSize[0]; ++LocalXYZ[0]) { |
| 200 | + |
| 201 | + for (int I = 0; I < Dims; ++I) { |
| 202 | + GlobalID[I] = GroupXYZ[I] * LocalSize[I] + LocalXYZ[I]; |
| 203 | + LocalID[I] = LocalXYZ[I]; |
| 204 | + } |
| 205 | + const sycl::item<Dims, /*Offset=*/true> GlobalItem = |
| 206 | + IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID, |
| 207 | + GlobalOffset); |
| 208 | + const sycl::item<Dims, /*Offset=*/false> LocalItem = |
| 209 | + IDBuilder::createItem<Dims, false>(LocalSize, LocalID); |
| 210 | + const sycl::nd_item<Dims> NDItem = |
| 211 | + IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group); |
| 212 | + MKernel(NDItem); |
| 213 | + } |
| 214 | + } |
| 215 | + } |
| 216 | + } |
| 217 | + } |
| 218 | + } |
| 219 | + } |
| 220 | + ~HostKernel() = default; |
| 221 | +}; |
| 222 | + |
| 223 | +// The base class for all types of command groups. |
| 224 | +class CG { |
| 225 | +public: |
| 226 | + // Type of the command group. |
| 227 | + enum CGTYPE { |
| 228 | + KERNEL, |
| 229 | + COPY_ACC_TO_PTR, |
| 230 | + COPY_PTR_TO_ACC, |
| 231 | + COPY_ACC_TO_ACC, |
| 232 | + FILL, |
| 233 | + UPDATE_HOST |
| 234 | + }; |
| 235 | + |
| 236 | + CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage, |
| 237 | + std::vector<detail::AccessorImplPtr> AccStorage, |
| 238 | + std::vector<std::shared_ptr<void>> SharedPtrStorage, |
| 239 | + std::vector<Requirement *> Requirements) |
| 240 | + : MType(Type), MArgsStorage(std::move(ArgsStorage)), |
| 241 | + MAccStorage(std::move(AccStorage)), |
| 242 | + MSharedPtrStorage(std::move(SharedPtrStorage)), |
| 243 | + MRequirements(std::move(Requirements)) {} |
| 244 | + |
| 245 | + CG(CG &&CommandGroup) = default; |
| 246 | + |
| 247 | + std::vector<Requirement *> getRequirements() const { return MRequirements; } |
| 248 | + |
| 249 | + CGTYPE getType() { return MType; } |
| 250 | + |
| 251 | +private: |
| 252 | + CGTYPE MType; |
| 253 | + // The following storages needed to ensure that arguments won't die while |
| 254 | + // we are using them. |
| 255 | + // Storage for standard layout arguments. |
| 256 | + std::vector<std::vector<char>> MArgsStorage; |
| 257 | + // Storage for accessors. |
| 258 | + std::vector<detail::AccessorImplPtr> MAccStorage; |
| 259 | + // Storage for shared_ptrs. |
| 260 | + std::vector<std::shared_ptr<void>> MSharedPtrStorage; |
| 261 | + // List of requirements that specify which memory is needed for the command |
| 262 | + // group to be executed. |
| 263 | + std::vector<Requirement *> MRequirements; |
| 264 | +}; |
| 265 | + |
| 266 | +// The class which represents "execute kernel" command group. |
| 267 | +class CGExecKernel : public CG { |
| 268 | +public: |
| 269 | + NDRDescT MNDRDesc; |
| 270 | + std::unique_ptr<HostKernelBase> MHostKernel; |
| 271 | + std::shared_ptr<detail::kernel_impl> MSyclKernel; |
| 272 | + std::vector<ArgDesc> MArgs; |
| 273 | + std::string MKernelName; |
| 274 | + detail::OSModuleHandle MOSModuleHandle; |
| 275 | + |
| 276 | + CGExecKernel(NDRDescT NDRDesc, std::unique_ptr<HostKernelBase> HKernel, |
| 277 | + std::shared_ptr<detail::kernel_impl> SyclKernel, |
| 278 | + std::vector<std::vector<char>> ArgsStorage, |
| 279 | + std::vector<detail::AccessorImplPtr> AccStorage, |
| 280 | + std::vector<std::shared_ptr<void>> SharedPtrStorage, |
| 281 | + std::vector<Requirement *> Requirements, |
| 282 | + std::vector<ArgDesc> Args, std::string KernelName, |
| 283 | + detail::OSModuleHandle OSModuleHandle) |
| 284 | + : CG(KERNEL, std::move(ArgsStorage), std::move(AccStorage), |
| 285 | + std::move(SharedPtrStorage), std::move(Requirements)), |
| 286 | + MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)), |
| 287 | + MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)), |
| 288 | + MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle) {} |
| 289 | + |
| 290 | + std::vector<ArgDesc> getArguments() const { return MArgs; } |
| 291 | + std::string getKernelName() const { return MKernelName; } |
| 292 | +}; |
| 293 | + |
| 294 | +// The class which represents "copy" command group. |
| 295 | +class CGCopy : public CG { |
| 296 | + void *MSrc; |
| 297 | + void *MDst; |
| 298 | + |
| 299 | +public: |
| 300 | + CGCopy(CGTYPE CopyType, void *Src, void *Dst, |
| 301 | + std::vector<std::vector<char>> ArgsStorage, |
| 302 | + std::vector<detail::AccessorImplPtr> AccStorage, |
| 303 | + std::vector<std::shared_ptr<void>> SharedPtrStorage, |
| 304 | + std::vector<Requirement *> Requirements) |
| 305 | + : CG(CopyType, std::move(ArgsStorage), std::move(AccStorage), |
| 306 | + std::move(SharedPtrStorage), std::move(Requirements)), |
| 307 | + MSrc(Src), MDst(Dst) {} |
| 308 | + void *getSrc() { return MSrc; } |
| 309 | + void *getDst() { return MDst; } |
| 310 | +}; |
| 311 | + |
| 312 | +// The class which represents "fill" command group. |
| 313 | +class CGFill : public CG { |
| 314 | +public: |
| 315 | + std::vector<char> MPattern; |
| 316 | + Requirement *MPtr; |
| 317 | + |
| 318 | + CGFill(std::vector<char> Pattern, void *Ptr, |
| 319 | + std::vector<std::vector<char>> ArgsStorage, |
| 320 | + std::vector<detail::AccessorImplPtr> AccStorage, |
| 321 | + std::vector<std::shared_ptr<void>> SharedPtrStorage, |
| 322 | + std::vector<Requirement *> Requirements) |
| 323 | + : CG(FILL, std::move(ArgsStorage), std::move(AccStorage), |
| 324 | + std::move(SharedPtrStorage), std::move(Requirements)), |
| 325 | + MPattern(std::move(Pattern)), MPtr((Requirement *)Ptr) {} |
| 326 | + Requirement *getReqToFill() { return MPtr; } |
| 327 | +}; |
| 328 | + |
| 329 | +// The class which represents "update host" command group. |
| 330 | +class CGUpdateHost : public CG { |
| 331 | + Requirement *MPtr; |
| 332 | + |
| 333 | +public: |
| 334 | + CGUpdateHost(void *Ptr, std::vector<std::vector<char>> ArgsStorage, |
| 335 | + std::vector<detail::AccessorImplPtr> AccStorage, |
| 336 | + std::vector<std::shared_ptr<void>> SharedPtrStorage, |
| 337 | + std::vector<Requirement *> Requirements) |
| 338 | + : CG(UPDATE_HOST, std::move(ArgsStorage), std::move(AccStorage), |
| 339 | + std::move(SharedPtrStorage), std::move(Requirements)), |
| 340 | + MPtr((Requirement *)Ptr) {} |
| 341 | + |
| 342 | + Requirement *getReqToUpdate() { return MPtr; } |
| 343 | +}; |
| 344 | + |
| 345 | +} // namespace cl |
| 346 | +} // namespace sycl |
| 347 | +} // namespace detail |
0 commit comments