Skip to content

Commit

Permalink
[SYCL] Untie PI functions from OpenCL (#1717)
Browse files Browse the repository at this point in the history
* Introduce pi_buffer_region type and use it instead of cl_buffer_region
* Introduce pi_fp_capabilities enum and use it in PI functions
* Fix type of mem advice parameter in piextUSMEnqueueMemAdvise
* Use pi_event_info instead of cl_event_info
* Extend pi_device_info with subgroup properties
* Extend pi_device_info with enum values to query subgroup information
and IL version, this values are going to be used level zero plugin

Signed-off-by: Artur Gainullin <artur.gainullin@intel.com>
  • Loading branch information
bader authored May 20, 2020
2 parents ebace77 + 20ec9f0 commit 358ae27
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 19 deletions.
34 changes: 29 additions & 5 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ typedef enum {
PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS,
PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM,
PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT,
PI_DEVICE_INFO_IL_VERSION = CL_DEVICE_IL_VERSION_KHR,
PI_DEVICE_INFO_NAME = CL_DEVICE_NAME,
PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR,
PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION,
Expand All @@ -241,6 +242,10 @@ typedef enum {
PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN =
CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE,
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = CL_DEVICE_MAX_NUM_SUB_GROUPS,
PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS =
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT =
Expand Down Expand Up @@ -299,6 +304,16 @@ typedef enum {
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
} _pi_kernel_group_info;

typedef enum {
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT,
PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST,
PI_FP_ROUND_TO_ZERO = CL_FP_ROUND_TO_ZERO,
PI_FP_ROUND_TO_INF = CL_FP_ROUND_TO_INF,
PI_FP_INF_NAN = CL_FP_INF_NAN,
PI_FP_DENORM = CL_FP_DENORM,
PI_FP_FMA = CL_FP_FMA
} _pi_fp_capabilities;

typedef enum {
PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT,
PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE,
Expand Down Expand Up @@ -512,6 +527,7 @@ using pi_image_info = _pi_image_info;
using pi_kernel_info = _pi_kernel_info;
using pi_kernel_group_info = _pi_kernel_group_info;
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
using pi_fp_capabilities = _pi_fp_capabilities;
using pi_event_info = _pi_event_info;
using pi_command_type = _pi_command_type;
using pi_mem_type = _pi_mem_type;
Expand Down Expand Up @@ -678,6 +694,13 @@ struct pi_device_binary_struct {
};
using pi_device_binary = pi_device_binary_struct *;

// pi_buffer_region structure repeats cl_buffer_region
struct pi_buffer_region_struct {
size_t origin;
size_t size;
};
using pi_buffer_region = pi_buffer_region_struct *;

// Offload binaries descriptor version supported by this library.
static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;

Expand Down Expand Up @@ -1118,10 +1141,10 @@ __SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel,
//
__SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event);

__SYCL_EXPORT pi_result piEventGetInfo(
pi_event event,
cl_event_info param_name, // TODO: untie from OpenCL
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);

__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event,
pi_profiling_info param_name,
Expand Down Expand Up @@ -1439,7 +1462,8 @@ __SYCL_EXPORT pi_result piextUSMEnqueuePrefetch(
// USM memadvise API to govern behavior of automatic migration mechanisms
__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue,
const void *ptr, size_t length,
int advice, pi_event *event);
pi_mem_advice advice,
pi_event *event);

/// API to query information about USM allocated pointers
/// Valid Queries:
Expand Down
10 changes: 6 additions & 4 deletions sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,18 +117,20 @@ enum class device : cl_device_info {
partition_affinity_domains = CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
partition_type_affinity_domain = CL_DEVICE_PARTITION_TYPE,
reference_count = CL_DEVICE_REFERENCE_COUNT,
il_version =
CL_DEVICE_IL_VERSION_KHR, // Same as CL_DEVICE_IL_VERSION for >=OpenCL 2.1
max_num_sub_groups = CL_DEVICE_MAX_NUM_SUB_GROUPS,
sub_group_independent_forward_progress =
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
sub_group_sizes = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
partition_type_property,
kernel_kernel_pipe_support,
// USM
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
usm_host_allocations = PI_USM_HOST_SUPPORT,
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
usm_host_allocations = PI_USM_HOST_SUPPORT,
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
};

enum class device_type : pi_uint64 {
Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,15 +1029,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_SINGLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA |
CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA |
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
return getInfo(param_value_size, param_value, param_value_size_ret, config);
}
case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA;
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA;
return getInfo(param_value_size, param_value, param_value_size_ret, config);
}
case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
Expand Down Expand Up @@ -1674,7 +1674,7 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags,
assert(memObj != nullptr);

const auto bufferRegion =
*reinterpret_cast<const cl_buffer_region *>(buffer_create_info);
*reinterpret_cast<const pi_buffer_region>(buffer_create_info);
assert((bufferRegion.size != 0u) && "PI_INVALID_BUFFER_SIZE");

assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
Expand Down Expand Up @@ -3596,7 +3596,7 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,

/// USM: memadvise API to govern behavior of automatic migration mechanisms
pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
size_t length, int advice,
size_t length, pi_mem_advice advice,
pi_event *event) {
assert(queue != nullptr);
assert(ptr != nullptr);
Expand Down
3 changes: 2 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
/// \param event is the event that represents this operation
// USM memadvise API to govern behavior of automatic migration mechanisms
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
size_t length, int advice, pi_event *event) {
size_t length, pi_mem_advice advice,
pi_event *event) {

return cast<pi_result>(
clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,
Expand Down
3 changes: 1 addition & 2 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
SizeInBytes *= Range[I];

RT::PiResult Error = PI_SUCCESS;
// TODO replace with pi_buffer_region
cl_buffer_region Region{Offset, SizeInBytes};
pi_buffer_region_struct Region{Offset, SizeInBytes};
RT::PiMem NewMem;
const detail::plugin &Plugin = TargetContext->getPlugin();
Error = Plugin.call_nocheck<PiApiKind::piMemBufferPartition>(
Expand Down

0 comments on commit 358ae27

Please sign in to comment.