@@ -1651,8 +1651,15 @@ pi_result cuda_piMemBufferCreate(pi_context context, pi_mem_flags flags,
16511651 cuMemHostRegister (host_ptr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
16521652 retErr = PI_CHECK_ERROR (cuMemHostGetDevicePointer (&ptr, host_ptr, 0 ));
16531653 allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr;
1654+ } else if (flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) {
1655+ retErr = PI_CHECK_ERROR (cuMemAllocHost (&host_ptr, size));
1656+ retErr = PI_CHECK_ERROR (cuMemHostGetDevicePointer (&ptr, host_ptr, 0 ));
1657+ allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
16541658 } else {
16551659 retErr = PI_CHECK_ERROR (cuMemAlloc (&ptr, size));
1660+ if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1661+ allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in;
1662+ }
16561663 }
16571664
16581665 if (retErr == PI_SUCCESS) {
@@ -1715,13 +1722,16 @@ pi_result cuda_piMemRelease(pi_mem memObj) {
17151722
17161723 if (memObj->mem_type_ == _pi_mem::mem_type::buffer) {
17171724 switch (uniqueMemObj->mem_ .buffer_mem_ .allocMode_ ) {
1725+ case _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in:
17181726 case _pi_mem::mem_::buffer_mem_::alloc_mode::classic:
17191727 ret = PI_CHECK_ERROR (cuMemFree (uniqueMemObj->mem_ .buffer_mem_ .ptr_ ));
17201728 break ;
17211729 case _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr:
17221730 ret = PI_CHECK_ERROR (
17231731 cuMemHostUnregister (uniqueMemObj->mem_ .buffer_mem_ .hostPtr_ ));
17241732 break ;
1733+ case _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr:
1734+ ret = PI_CHECK_ERROR (cuMemFreeHost (uniqueMemObj->mem_ .buffer_mem_ .hostPtr_ ));
17251735 };
17261736 } else if (memObj->mem_type_ == _pi_mem::mem_type::surface) {
17271737 ret = PI_CHECK_ERROR (
0 commit comments