Skip to content

[SYCL][NFC] Replace csd alias with detail namespace #885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 55 additions & 36 deletions sycl/include/CL/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ class __copyAcc2Acc;
namespace cl {
namespace sycl {

namespace csd = cl::sycl::detail;

// Forward declaration

template <typename T, int Dimensions, typename AllocatorT> class buffer;
Expand Down Expand Up @@ -106,7 +104,7 @@ template <typename Name, typename Type> struct get_kernel_name_t {
};

/// Specialization for the case when \c Name is undefined.
template <typename Type> struct get_kernel_name_t<csd::auto_name, Type> {
template <typename Type> struct get_kernel_name_t<detail::auto_name, Type> {
using name = Type;
};

Expand Down Expand Up @@ -409,9 +407,9 @@ class handler {
break;
case detail::CG::PREFETCH_USM:
CommandGroup.reset(new detail::CGPrefetchUSM(
MDstPtr, MLength, std::move(MArgsStorage),
std::move(MAccStorage), std::move(MSharedPtrStorage),
std::move(MRequirements), std::move(MEvents)));
MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
std::move(MSharedPtrStorage), std::move(MRequirements),
std::move(MEvents)));
break;
case detail::CG::NONE:
throw runtime_error("Command group submitted without a kernel or a "
Expand Down Expand Up @@ -666,7 +664,7 @@ class handler {
extractArgsAndReqsFromLambda(MHostKernel->getPtr(), KI::getNumParams(),
&KI::getParamDesc(0));
MKernelName = KI::getName();
MOSModuleHandle = csd::OSUtil::getOSModuleHandle(KI::getName());
MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName());
} else {
// In case w/o the integration header it is necessary to process
// accessors from the list(which are associated with this handler) as
Expand All @@ -676,9 +674,10 @@ class handler {
}

// single_task version with a kernel represented as a lambda.
template <typename KernelName = csd::auto_name, typename KernelType>
template <typename KernelName = detail::auto_name, typename KernelType>
void single_task(KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_single_task<NameT>(KernelFunc);
#else
Expand All @@ -691,9 +690,11 @@ class handler {

// parallel_for version with a kernel represented as a lambda + range that
// specifies global size only.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(range<Dims> NumWorkItems, KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -708,17 +709,18 @@ class handler {
MNDRDesc.set(range<1>{1});

MArgs = std::move(MAssociatedAccesors);
MHostKernel.reset(
new detail::HostKernel<FuncT, void, 1>(std::move(Func)));
MHostKernel.reset(new detail::HostKernel<FuncT, void, 1>(std::move(Func)));
MCGType = detail::CG::RUN_ON_HOST_INTEL;
}

// parallel_for version with a kernel represented as a lambda + range and
// offset that specify global size and global offset correspondingly.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -730,9 +732,11 @@ class handler {

// parallel_for version with a kernel represented as a lambda + nd_range that
// specifies global, local sizes and offset.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -742,10 +746,12 @@ class handler {
#endif
}

template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for_work_group(range<Dims> NumWorkGroups,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for_work_group<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -755,11 +761,13 @@ class handler {
#endif // __SYCL_DEVICE_ONLY__
}

template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for_work_group(range<Dims> NumWorkGroups,
range<Dims> WorkGroupSize,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for_work_group<NameT, KernelType, Dims>(KernelFunc);
#else
Expand Down Expand Up @@ -823,9 +831,10 @@ class handler {
// single_task version which takes two "kernels". One is a lambda which is
// used if device, queue is bound to, is host device. Second is a sycl::kernel
// which is used otherwise.
template <typename KernelName = csd::auto_name, typename KernelType>
template <typename KernelName = detail::auto_name, typename KernelType>
void single_task(kernel SyclKernel, KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_single_task<NameT>(KernelFunc);
#else
Expand All @@ -842,10 +851,12 @@ class handler {
// parallel_for version which takes two "kernels". One is a lambda which is
// used if device, queue is bound to, is host device. Second is a sycl::kernel
// which is used otherwise. range argument specifies global size.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(kernel SyclKernel, range<Dims> NumWorkItems,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -862,10 +873,12 @@ class handler {
// parallel_for version which takes two "kernels". One is a lambda which is
// used if device, queue is bound to, is host device. Second is a sycl::kernel
// which is used otherwise. range and id specify global size and offset.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(kernel SyclKernel, range<Dims> NumWorkItems,
id<Dims> WorkItemOffset, KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -882,10 +895,12 @@ class handler {
// parallel_for version which takes two "kernels". One is a lambda which is
// used if device, queue is bound to, is host device. Second is a sycl::kernel
// which is used otherwise. nd_range specifies global, local size and offset.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(kernel SyclKernel, nd_range<Dims> NDRange,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -905,10 +920,12 @@ class handler {
/// of the kernel. The same source kernel can be compiled multiple times
/// yielding multiple kernel class objects accessible via the \c program class
/// interface.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for_work_group(kernel SyclKernel, range<Dims> NumWorkGroups,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for_work_group<NameT, KernelType, Dims>(KernelFunc);
#else
Expand All @@ -921,11 +938,13 @@ class handler {

/// Two-kernel version of the \c parallel_for_work_group with group and local
/// range.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for_work_group(kernel SyclKernel, range<Dims> NumWorkGroups,
range<Dims> WorkGroupSize,
KernelType KernelFunc) {
using NameT = typename csd::get_kernel_name_t<KernelName, KernelType>::name;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for_work_group<NameT, KernelType, Dims>(KernelFunc);
#else
Expand Down Expand Up @@ -1083,7 +1102,7 @@ class handler {
// Shapes can be 1, 2 or 3 dimensional rectangles.
template <int Dims_Src, int Dims_Dst>
static bool IsCopyingRectRegionAvailable(const range<Dims_Src> Src,
const range<Dims_Dst> Dst) {
const range<Dims_Dst> Dst) {
if (Dims_Src > Dims_Dst)
return false;
for (size_t I = 0; I < Dims_Src; ++I)
Expand All @@ -1092,7 +1111,7 @@ class handler {
return true;
}

// copy memory pointed by accessor to the memory pointed by another accessor
// copy memory pointed by accessor to the memory pointed by another accessor
template <
typename T_Src, int Dims_Src, access::mode AccessMode_Src,
access::target AccessTarget_Src, typename T_Dst, int Dims_Dst,
Expand Down Expand Up @@ -1209,7 +1228,7 @@ class handler {
}

// Copy memory from the source to the destination.
void memcpy(void* Dest, const void* Src, size_t Count) {
void memcpy(void *Dest, const void *Src, size_t Count) {
MSrcPtr = const_cast<void *>(Src);
MDstPtr = Dest;
MLength = Count;
Expand Down
51 changes: 29 additions & 22 deletions sycl/include/CL/sycl/ordered_queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,40 @@ class ordered_queue {
explicit ordered_queue(const property_list &propList = {})
: ordered_queue(default_selector(), async_handler{}, propList) {}

ordered_queue(const async_handler &asyncHandler, const property_list &propList = {})
ordered_queue(const async_handler &asyncHandler,
const property_list &propList = {})
: ordered_queue(default_selector(), asyncHandler, propList) {}

ordered_queue(const device_selector &deviceSelector,
const property_list &propList = {})
: ordered_queue(deviceSelector.select_device(), async_handler{}, propList) {}
const property_list &propList = {})
: ordered_queue(deviceSelector.select_device(), async_handler{},
propList) {}

ordered_queue(const device_selector &deviceSelector,
const async_handler &asyncHandler, const property_list &propList = {})
const async_handler &asyncHandler,
const property_list &propList = {})
: ordered_queue(deviceSelector.select_device(), asyncHandler, propList) {}

ordered_queue(const device &syclDevice, const property_list &propList = {})
: ordered_queue(syclDevice, async_handler{}, propList) {}

ordered_queue(const device &syclDevice, const async_handler &asyncHandler,
const property_list &propList = {});
const property_list &propList = {});

ordered_queue(const context &syclContext, const device_selector &deviceSelector,
const property_list &propList = {})
ordered_queue(const context &syclContext,
const device_selector &deviceSelector,
const property_list &propList = {})
: ordered_queue(syclContext, deviceSelector,
detail::getSyclObjImpl(syclContext)->get_async_handler(),
propList) {}
detail::getSyclObjImpl(syclContext)->get_async_handler(),
propList) {}

ordered_queue(const context &syclContext, const device_selector &deviceSelector,
const async_handler &asyncHandler, const property_list &propList = {});
ordered_queue(const context &syclContext,
const device_selector &deviceSelector,
const async_handler &asyncHandler,
const property_list &propList = {});

ordered_queue(cl_command_queue cl_Queue, const context &syclContext,
const async_handler &asyncHandler = {});
const async_handler &asyncHandler = {});

ordered_queue(const ordered_queue &rhs) = default;

Expand Down Expand Up @@ -104,22 +110,20 @@ class ordered_queue {
return impl->get_property<propertyT>();
}

event memset(void* ptr, int value, size_t count) {
event memset(void *ptr, int value, size_t count) {
return impl->memset(impl, ptr, value, count);
}

event memcpy(void* dest, const void* src, size_t count) {
event memcpy(void *dest, const void *src, size_t count) {
return impl->memcpy(impl, dest, src, count);
}

event prefetch(const void* Ptr, size_t Count) {
return submit([=](handler &cgh) {
cgh.prefetch(Ptr, Count);
});
event prefetch(const void *Ptr, size_t Count) {
return submit([=](handler &cgh) { cgh.prefetch(Ptr, Count); });
}

// single_task version with a kernel represented as a lambda.
template <typename KernelName = csd::auto_name, typename KernelType>
template <typename KernelName = detail::auto_name, typename KernelType>
void single_task(KernelType KernelFunc) {
submit([&](handler &cgh) {
cgh.template single_task<KernelName, KernelType>(KernelFunc);
Expand All @@ -128,7 +132,8 @@ class ordered_queue {

// parallel_for version with a kernel represented as a lambda + range that
// specifies global size only.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(range<Dims> NumWorkItems, KernelType KernelFunc) {
// By-value or By-reference for this?
submit([&](handler &cgh) {
Expand All @@ -139,7 +144,8 @@ class ordered_queue {

// parallel_for version with a kernel represented as a lambda + range and
// offset that specify global size and global offset correspondingly.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
KernelType KernelFunc) {
submit([&](handler &cgh) {
Expand All @@ -150,7 +156,8 @@ class ordered_queue {

// parallel_for version with a kernel represented as a lambda + nd_range that
// specifies global, local sizes and offset.
template <typename KernelName = csd::auto_name, typename KernelType, int Dims>
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
void parallel_for(nd_range<Dims> ExecutionRange, KernelType KernelFunc) {
submit([&](handler &cgh) {
cgh.template parallel_for<KernelName, KernelType, Dims>(ExecutionRange,
Expand Down