Skip to content

Commit 06324e2

Browse files
committed
Add buffer location property
Sycl runtime calls clEnqueueWriteBuffer before clSetKernelArgs, this cause the buffer to be allocated on the device before knowing the right place of allocating the memory. As a result, later when kernel get invoked, the memory has to be copied from device's default global memory to the buffer location specified in kernel. This is an additional memory copy operation. This extension does not interfer with the other way of setting buffer location (i.e through clSetKernelArgs). This property exist for integration with sycl runtime, not for pure opencl user to use. If opencl user wish to use this property, they have to make sure the buffer location passed into clCreateBufferWithPropertyINTEL has to match the one defined in kernel function interface, otherwise the extra memory copy issue will remain. When resizing reserved allocation, we now have the information to allocate minimum amount of space required according to the property passed in. This is done to align with opencl docs and header in; KhronosGroup/OpenCL-Headers#193 KhronosGroup/OpenCL-Docs#746
1 parent a5f34eb commit 06324e2

File tree

3 files changed

+61
-4
lines changed

3 files changed

+61
-4
lines changed

src/acl_mem.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
416416
cl_bool context_has_device_with_physical_mem;
417417
unsigned int idevice;
418418
cl_uint bank_id = 0;
419+
cl_uint tmp_mem_id = 0;
419420
acl_lock();
420421

421422
#ifdef MEM_DEBUG_MSG
@@ -431,6 +432,9 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
431432
}
432433
bank_id = (cl_uint) * (properties + 1);
433434
} break;
435+
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
436+
tmp_mem_id = (cl_uint) * (properties + 1);
437+
} break;
434438
default: {
435439
UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties");
436440
}
@@ -553,6 +557,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
553557
UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
554558
"Could not allocate a cl_mem object");
555559
}
560+
mem->mem_id = tmp_mem_id;
556561

557562
mem->block_allocation = new_block;
558563
mem->block_allocation->mem_obj = mem;
@@ -784,7 +789,6 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
784789
mem->context = context;
785790
mem->flags = flags;
786791
mem->size = size;
787-
mem->mem_id = 0;
788792

789793
mem->bank_id = 0;
790794
if (is_SOC_device()) {
@@ -1254,7 +1258,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA(
12541258

12551259
mem->context = context;
12561260
mem->flags = sub_flags;
1257-
mem->mem_id = 0;
1261+
mem->mem_id = buffer->mem_id;
12581262

12591263
if (is_SOC_device()) {
12601264
// HPS DDR is system managed for SoC.
@@ -1372,6 +1376,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA(
13721376
context = mem->context;
13731377

13741378
switch (param_name) {
1379+
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL:
1380+
RESULT_UINT(mem->mem_id);
1381+
break;
13751382
case CL_MEM_TYPE:
13761383
RESULT_ENUM(mem->mem_object_type);
13771384
break;
@@ -4417,6 +4424,10 @@ void acl_resize_reserved_allocations_for_device(cl_mem mem,
44174424
unsigned int num_global_mem_systems =
44184425
def.autodiscovery_def.num_global_mem_systems;
44194426

4427+
// When we don't know how many memory systems will exist
4428+
// Load as much as needed.
4429+
num_global_mem_systems = std::max(num_global_mem_systems, mem->mem_id + 1);
4430+
44204431
// For the simulation flow we don't know how many memory systems will exist
44214432
// until we load the .aocx, which may not happen until somewhat later.
44224433
// Reserving space is quite cheap, so reserve space for many memory systems.

src/acl_usm.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,8 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device,
255255
cl_int status;
256256

257257
// Use cl_mem for convenience
258-
cl_mem usm_device_buffer =
259-
clCreateBufferIntelFPGA(context, CL_MEM_READ_WRITE, size, NULL, &status);
258+
cl_mem usm_device_buffer = clCreateBufferWithPropertiesINTEL(
259+
context, NULL, CL_MEM_READ_WRITE, size, NULL, &status);
260260
if (status != CL_SUCCESS) {
261261
UNLOCK_BAIL_INFO(status, context, "Failed to allocate device memory");
262262
}

test/acl_mem_test.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2799,6 +2799,52 @@ TEST(acl_mem, case_205751_overlapping_alloc) {
27992799
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(c));
28002800
}
28012801

2802+
TEST(acl_mem, buffer_location_property) {
2803+
ACL_LOCKED(acl_print_debug_msg("begin buffer_location_property\n"));
2804+
// Test assumes more than 1 global memory space
2805+
// Allocate a small buffer (a), then try to allocate two buffers (b, c) of
2806+
// size bank_size. Expect the second allocation to fail.
2807+
cl_mem a;
2808+
cl_int status = CL_SUCCESS;
2809+
size_t total_size = ACL_RANGE_SIZE(
2810+
m_device[0]->def.autodiscovery_def.global_mem_defs[0].range);
2811+
size_t bank_size = total_size / 2;
2812+
size_t small_size = bank_size / 1024;
2813+
2814+
cl_mem_properties_intel props[] = {CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, 0, 0};
2815+
a = clCreateBufferWithPropertiesINTEL(m_context, props, 0, bank_size, 0,
2816+
&status);
2817+
ACL_LOCKED(CHECK(acl_mem_is_valid(a)));
2818+
CHECK_EQUAL(CL_SUCCESS, status);
2819+
assert(a);
2820+
CHECK_EQUAL(1, acl_ref_count(a));
2821+
cl_uint read_mem_id = 4;
2822+
size_t size_ret;
2823+
CHECK_EQUAL(CL_SUCCESS,
2824+
clGetMemObjectInfo(a, CL_MEM_ALLOC_BUFFER_LOCATION_INTEL,
2825+
sizeof(cl_uint), &read_mem_id, &size_ret));
2826+
CHECK_EQUAL(0, read_mem_id);
2827+
2828+
cl_buffer_region test_region = {0, 2};
2829+
cl_mem subbuffer =
2830+
clCreateSubBuffer(a, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
2831+
&test_region, &status);
2832+
ACL_LOCKED(CHECK(acl_mem_is_valid(subbuffer)));
2833+
CHECK_EQUAL(CL_SUCCESS, status);
2834+
assert(subbuffer);
2835+
CHECK_EQUAL(2, acl_ref_count(a));
2836+
read_mem_id = 4;
2837+
CHECK_EQUAL(CL_SUCCESS,
2838+
clGetMemObjectInfo(subbuffer, CL_MEM_ALLOC_BUFFER_LOCATION_INTEL,
2839+
sizeof(cl_uint), &read_mem_id, &size_ret));
2840+
CHECK_EQUAL(0, read_mem_id);
2841+
2842+
ACL_LOCKED(CHECK_EQUAL(acl_bind_buffer_to_device(m_cq->device, a), 1));
2843+
2844+
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(subbuffer));
2845+
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(a));
2846+
}
2847+
28022848
MT_TEST(acl_mem, map_buf_bad_flags) {
28032849
ACL_LOCKED(acl_print_debug_msg("begin buf_bad_flags\n"));
28042850
cl_int status = CL_SUCCESS;

0 commit comments

Comments
 (0)