@@ -224,7 +224,9 @@ int c_dbcsr_acc_dev_mem_allocate(void** dev_mem, size_t nbytes) {
224
224
# endif
225
225
clReleaseMemObject (buffer );
226
226
# if defined(CL_VERSION_2_0 )
227
- /*if (NULL != ptr)*/ clSVMFree (context , ptr );
227
+ if (0 != c_dbcsr_acc_opencl_config .device [tid ].svm_interop /*&& (NULL != ptr)*/ ) {
228
+ clSVMFree (context , ptr );
229
+ }
228
230
# endif
229
231
result = EXIT_FAILURE ;
230
232
}
@@ -271,8 +273,10 @@ int c_dbcsr_acc_dev_mem_deallocate(void* dev_mem) {
271
273
}
272
274
# endif
273
275
# if defined(CL_VERSION_2_0 )
274
- assert (NULL != c_dbcsr_acc_opencl_config .device [tid ].context );
275
- clSVMFree (c_dbcsr_acc_opencl_config .device [tid ].context , ptr ); /*if (NULL != ptr)*/
276
+ if (0 != c_dbcsr_acc_opencl_config .device [tid ].svm_interop /*&& (NULL != ptr)*/ ) {
277
+ assert (NULL != c_dbcsr_acc_opencl_config .device [tid ].context );
278
+ clSVMFree (c_dbcsr_acc_opencl_config .device [tid ].context , ptr );
279
+ }
276
280
# endif
277
281
}
278
282
# if defined(__DBCSR_ACC ) && defined(ACC_OPENCL_PROFILE )
@@ -363,8 +367,36 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
363
367
# endif
364
368
assert ((NULL != devmem_src || 0 == nbytes ) && (NULL != devmem_dst || 0 == nbytes ) && NULL != stream );
365
369
if (NULL != devmem_src && NULL != devmem_dst && 0 != nbytes ) {
366
- result = clEnqueueCopyBuffer (* ACC_OPENCL_STREAM (stream ), * ACC_OPENCL_MEM (devmem_src ), * ACC_OPENCL_MEM (devmem_dst ),
367
- 0 /*src_offset*/ , 0 /*dst_offset*/ , nbytes , 0 , NULL , NULL );
370
+ const cl_mem * const src = ACC_OPENCL_MEM (devmem_src ), * const dst = ACC_OPENCL_MEM (devmem_dst );
371
+ assert (NULL != * src && NULL != * dst );
372
+ if (* src != * dst ) {
373
+ const cl_command_queue queue = * ACC_OPENCL_STREAM (stream );
374
+ if (0 == (2 & c_dbcsr_acc_opencl_config .devcopy )) {
375
+ result = clEnqueueCopyBuffer (queue , * src , * dst , 0 /*src_offset*/ , 0 /*dst_offset*/ , nbytes , 0 , NULL , NULL );
376
+ }
377
+ else {
378
+ static volatile int lock ; /* creating cl_kernel and clSetKernelArg must be synchronized */
379
+ static cl_kernel kernel = NULL ;
380
+ LIBXSMM_ATOMIC_ACQUIRE (& lock , LIBXSMM_SYNC_NPAUSE , LIBXSMM_ATOMIC_RELAXED );
381
+ if (NULL == kernel ) { /* generate kernel */
382
+ const char source [] = "kernel void memcpy_d2d(global uchar *restrict src, global uchar *restrict dst) {\n"
383
+ " const size_t i = get_global_id(0);\n"
384
+ " dst[i] = src[i];\n"
385
+ "}\n" ;
386
+ result = c_dbcsr_acc_opencl_kernel (source , "memcpy_d2d" /*kernel_name*/ , NULL /*build_params*/ , NULL /*build_options*/ ,
387
+ NULL /*try_build_options*/ , NULL /*try_ok*/ , NULL /*extnames*/ , 0 /*num_exts*/ , & kernel );
388
+ }
389
+ if (EXIT_SUCCESS == result ) {
390
+ assert (NULL != kernel );
391
+ ACC_OPENCL_CHECK (clSetKernelArg (kernel , 0 , sizeof (cl_mem ), src ), "set src argument of memcpy_d2d kernel" , result );
392
+ ACC_OPENCL_CHECK (clSetKernelArg (kernel , 1 , sizeof (cl_mem ), dst ), "set dst argument of memcpy_d2d kernel" , result );
393
+ ACC_OPENCL_CHECK (clEnqueueNDRangeKernel (
394
+ queue , kernel , 1 /*work_dim*/ , NULL /*offset*/ , & nbytes , NULL /*local_work_size*/ , 0 , NULL , NULL ),
395
+ "launch memcpy_d2d kernel" , result );
396
+ }
397
+ LIBXSMM_ATOMIC_RELEASE (& lock , LIBXSMM_ATOMIC_RELAXED );
398
+ }
399
+ }
368
400
}
369
401
# if defined(__DBCSR_ACC ) && defined(ACC_OPENCL_PROFILE )
370
402
c_dbcsr_timestop (& routine_handle );
@@ -385,7 +417,7 @@ int c_dbcsr_acc_memset_zero(void* dev_mem, size_t offset, size_t nbytes, void* s
385
417
if (0 != nbytes ) {
386
418
const cl_command_queue queue = * ACC_OPENCL_STREAM (stream );
387
419
const cl_mem * const buffer = ACC_OPENCL_MEM (dev_mem );
388
- if (0 == c_dbcsr_acc_opencl_config .nullify ) {
420
+ if (0 == ( 1 & c_dbcsr_acc_opencl_config .devcopy ) ) {
389
421
static const cl_uchar pattern = 0 ; /* fill with zeros */
390
422
result = clEnqueueFillBuffer (queue , * buffer , & pattern , sizeof (pattern ), offset , nbytes , 0 , NULL , NULL );
391
423
}
0 commit comments