Skip to content

Commit 6f48e96

Browse files
committed
Set default values for kernel image static part and skip CSR write if no change
1 parent e4e5b10 commit 6f48e96

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

include/acl_kernel_if.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ typedef struct {
8686
// CRA address offset for backwards compatibility
8787
unsigned int cra_address_offset = 8;
8888

89+
// Kernel static image cache for trackinig changed work dimensions, etc.
90+
std::vector<std::unique_ptr<char[]>> static_img_cache;
8991
// Kernel argument cache for trackinig changed arguments
9092
std::vector<std::unique_ptr<char[]>> accel_arg_cache;
9193
} acl_kernel_if;

include/acl_types.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ class acl_device_program_info_t {
441441
// don't expect it.
442442
#pragma pack(push, 4)
443443
// These are the bytes written to global memory for a kernel invocation.
444-
typedef struct {
444+
typedef struct acl_dev_kernel_invocation_image {
445445
// The activation_id is the index into the device op queue.
446446
// The value at acl_platform.device_op_queue[activation_id] will be
447447
// updated asynchronously by the HAL, so its address must remain stable.
@@ -485,6 +485,19 @@ typedef struct {
485485
char *arg_value;
486486
size_t arg_value_size;
487487

488+
// Define constructor to initialize the invocation image to default values
489+
// Hard code for now
490+
acl_dev_kernel_invocation_image()
491+
: activation_id(0), accel_id(0), work_dim(1), work_group_size(1),
492+
padding(0), arg_value(NULL), arg_value_size(0) {
493+
for (unsigned i = 0; i < 3; ++i) {
494+
global_work_size[i] = 1;
495+
num_groups[i] = 1;
496+
local_work_size[i] = 1;
497+
global_work_offset[i] = 0;
498+
}
499+
}
500+
488501
} acl_dev_kernel_invocation_image_t;
489502

490503
// Invocation image structure that matches the 18.1 CRA layout.

src/acl_kernel_if.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -879,14 +879,23 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
879879
if (kern->num_accel > 0) {
880880
kern->accel_job_ids.resize(kern->num_accel);
881881
kern->accel_invoc_queue_depth.resize(kern->num_accel);
882+
kern->static_img_cache.resize(kern->num_accel);
882883
kern->accel_arg_cache.resize(kern->num_accel);
883884

884885
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
885886
// multithread safe.
886887
kern->accel_queue_front.resize(kern->num_accel);
887888
kern->accel_queue_back.resize(kern->num_accel);
888889

890+
acl_dev_kernel_invocation_image_t default_invocation;
891+
size_t image_size_static =
892+
(size_t)((uintptr_t) & (default_invocation.arg_value) - (uintptr_t) &
893+
(default_invocation.work_dim));
894+
889895
for (unsigned a = 0; a < kern->num_accel; ++a) {
896+
kern->static_img_cache[a] = std::make_unique<char[]>(image_size_static);
897+
memcpy(kern->static_img_cache[a].get(),
898+
(char *)(&(default_invocation.work_dim)), image_size_static);
890899
unsigned int max_same_accel_launches =
891900
devdef.accel[a].fast_launch_depth + 1;
892901
// +1, because fast launch depth does not account for the running kernel
@@ -1129,9 +1138,8 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
11291138
offset = (unsigned int)(KERNEL_OFFSET_INVOCATION_IMAGE +
11301139
kern->cra_address_offset);
11311140
image_p = (uintptr_t) & (image->work_dim);
1132-
image_size_static =
1133-
(size_t)((uintptr_t) & (image->arg_value) - (uintptr_t) &
1134-
(image->work_dim));
1141+
image_size_static = (size_t)(
1142+
(uintptr_t) & (image->arg_value) - (uintptr_t) & (image->work_dim));
11351143
}
11361144

11371145
if ((kern->io.debug_verbosity) >= 2) {
@@ -1153,8 +1161,19 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
11531161
// it is in dynamic memory. Only write the static part of the invocation
11541162
// image if this kernel uses CRA control.
11551163
if (!kern->streaming_control_signal_names[accel_id]) {
1156-
acl_kernel_cra_write_block(kern, accel_id, offset, (unsigned int *)image_p,
1157-
image_size_static);
1164+
if (kern->csr_version == CSR_VERSION_ID_18_1) {
1165+
// Just write everything for older CSR version
1166+
acl_kernel_cra_write_block(kern, accel_id, offset,
1167+
(unsigned int *)image_p, image_size_static);
1168+
} else {
1169+
char *img_cache_ptr = kern->static_img_cache[accel_id].get();
1170+
assert(img_cache_ptr && "kernel image cache not initialized!");
1171+
if (memcmp(img_cache_ptr, (char *)image_p, image_size_static) != 0) {
1172+
acl_kernel_cra_write_block(kern, accel_id, offset,
1173+
(unsigned int *)image_p, image_size_static);
1174+
memcpy(img_cache_ptr, (char *)image_p, image_size_static);
1175+
}
1176+
}
11581177
}
11591178

11601179
bool accel_has_agent_args = false;
@@ -1692,6 +1711,7 @@ void acl_kernel_if_close(acl_kernel_if *kern) {
16921711
kern->accel_invoc_queue_depth.clear();
16931712
kern->accel_queue_front.clear();
16941713
kern->accel_queue_back.clear();
1714+
kern->static_img_cache.clear();
16951715
kern->accel_arg_cache.clear();
16961716
kern->autorun_profiling_kernel_id = -1;
16971717
}

0 commit comments

Comments
 (0)