Skip to content

Device global copy kernel implementation #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Resolve conflict and improve code style
  • Loading branch information
sherry-yuan authored and intel-liudean committed Feb 3, 2023
commit c047c764a7740d1d319772867dc65da686cda88a
2 changes: 2 additions & 0 deletions include/acl.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ typedef struct {

bool streaming_control_info_available;
acl_streaming_kernel_control_info streaming_control_info;
unsigned int device_global_address; /* Address of kernel's device global*/
unsigned int device_global_size; /* Size of address space of device global used by this kernel*/
} acl_accel_def_t;

/* An ACL system definition.
Expand Down
6 changes: 4 additions & 2 deletions src/acl_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,10 @@ CL_API_ENTRY cl_int clEnqueueReadGlobalVariableINTEL(
}

// dev_addr_t dev_global_address =
// kernel->dev_bin->get_devdef().autodiscovery_def.?
uintptr_t dev_global_address = 0x4000000;
uintptr_t dev_global_address = kernel->accel_def->device_global_address;
assert(kernel->accel_def->device_global_address == 4096); // TODO: remove when merging
// uintptr_t dev_global_address = 0x4000000;
// TODO: add checks for whether the copy will be out of bound for device global
void *dev_global_ptr =
(void *)(dev_global_address + offset * 8); // 1 unit of offset is 8 bits
status = set_kernel_arg_mem_pointer_without_checks(kernel, 0, dev_global_ptr);
Expand Down
3 changes: 3 additions & 0 deletions test/acl_auto_configure_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ TEST(auto_configure, simple) {
CHECK_EQUAL(0,
(int)m_device_def.autodiscovery_def.accel[0].max_work_group_size);
CHECK_EQUAL(1, (int)m_device_def.autodiscovery_def.accel[0].is_sycl_compile);
CHECK_EQUAL(4096, (int)m_device_def.autodiscovery_def.accel[0].device_global_address);
CHECK_EQUAL(2048, (int)m_device_def.autodiscovery_def.accel[0].device_global_size);


// Checks for device global entry.
CHECK_EQUAL(2, m_device_def.autodiscovery_def.device_global_mem_defs.size());
Expand Down
33 changes: 19 additions & 14 deletions test/acl_globals_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,20 +198,25 @@ static std::vector<acl_accel_def_t> acltest_complex_system_device0_accel = {
{},
{32768, 0, 0},
1},
{14,
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]),
acltest_kernels[14],
acltest_laspace_info,
{0, 0, 0},
0,
0,
1,
0,
32768,
3,
{},
{32768, 0, 0},
1},
{14, // id
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[11]), // mem
acltest_kernels[14], // iface
acltest_laspace_info, // local_aspaces
{0, 0, 0}, // compile_work_group_size
0, // is_workgroup_invariant
0, // is_workitem_invariant
1, // num_vector_lanes
0, // profiling_words_to_readback
32768, // max_work_group_size
3, // max_global_work_dim
{}, // printf_format_info
{32768, 0, 0}, // max_work_group_size_arr
1, // uses_global_work_offset
0, // fast_launch_depth
1, // is_sycl_compile
4096, // device_global_address
2048, // device_global_size
},
{1,
ACL_RANGE_FROM_ARRAY(acltest_devicelocal[1]),
acltest_kernels[1],
Expand Down