@@ -879,14 +879,23 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
879
879
if (kern->num_accel > 0 ) {
880
880
kern->accel_job_ids .resize (kern->num_accel );
881
881
kern->accel_invoc_queue_depth .resize (kern->num_accel );
882
+ kern->static_img_cache .resize (kern->num_accel );
882
883
kern->accel_arg_cache .resize (kern->num_accel );
883
884
884
885
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
885
886
// multithread safe.
886
887
kern->accel_queue_front .resize (kern->num_accel );
887
888
kern->accel_queue_back .resize (kern->num_accel );
888
889
890
+ acl_dev_kernel_invocation_image_t default_invocation;
891
+ size_t image_size_static =
892
+ (size_t )((uintptr_t ) & (default_invocation.arg_value ) - (uintptr_t ) &
893
+ (default_invocation.work_dim ));
894
+
889
895
for (unsigned a = 0 ; a < kern->num_accel ; ++a) {
896
+ kern->static_img_cache [a] = std::make_unique<char []>(image_size_static);
897
+ memcpy (kern->static_img_cache [a].get (),
898
+ (char *)(&(default_invocation.work_dim )), image_size_static);
890
899
unsigned int max_same_accel_launches =
891
900
devdef.accel [a].fast_launch_depth + 1 ;
892
901
// +1, because fast launch depth does not account for the running kernel
@@ -1129,9 +1138,8 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
1129
1138
offset = (unsigned int )(KERNEL_OFFSET_INVOCATION_IMAGE +
1130
1139
kern->cra_address_offset );
1131
1140
image_p = (uintptr_t ) & (image->work_dim );
1132
- image_size_static =
1133
- (size_t )((uintptr_t ) & (image->arg_value ) - (uintptr_t ) &
1134
- (image->work_dim ));
1141
+ image_size_static = (size_t )(
1142
+ (uintptr_t ) & (image->arg_value ) - (uintptr_t ) & (image->work_dim ));
1135
1143
}
1136
1144
1137
1145
if ((kern->io .debug_verbosity ) >= 2 ) {
@@ -1153,8 +1161,19 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
1153
1161
// it is in dynamic memory. Only write the static part of the invocation
1154
1162
// image if this kernel uses CRA control.
1155
1163
if (!kern->streaming_control_signal_names [accel_id]) {
1156
- acl_kernel_cra_write_block (kern, accel_id, offset, (unsigned int *)image_p,
1157
- image_size_static);
1164
+ if (kern->csr_version == CSR_VERSION_ID_18_1) {
1165
+ // Just write everything for older CSR version
1166
+ acl_kernel_cra_write_block (kern, accel_id, offset,
1167
+ (unsigned int *)image_p, image_size_static);
1168
+ } else {
1169
+ char *img_cache_ptr = kern->static_img_cache [accel_id].get ();
1170
+ assert (img_cache_ptr && " kernel image cache not initialized!" );
1171
+ if (memcmp (img_cache_ptr, (char *)image_p, image_size_static) != 0 ) {
1172
+ acl_kernel_cra_write_block (kern, accel_id, offset,
1173
+ (unsigned int *)image_p, image_size_static);
1174
+ memcpy (img_cache_ptr, (char *)image_p, image_size_static);
1175
+ }
1176
+ }
1158
1177
}
1159
1178
1160
1179
bool accel_has_agent_args = false ;
@@ -1692,6 +1711,7 @@ void acl_kernel_if_close(acl_kernel_if *kern) {
1692
1711
kern->accel_invoc_queue_depth .clear ();
1693
1712
kern->accel_queue_front .clear ();
1694
1713
kern->accel_queue_back .clear ();
1714
+ kern->static_img_cache .clear ();
1695
1715
kern->accel_arg_cache .clear ();
1696
1716
kern->autorun_profiling_kernel_id = -1 ;
1697
1717
}
0 commit comments