From 584869eb05a0e6e208d1cbc60e156f8d21f8d5f2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 30 Sep 2024 23:38:52 -0700 Subject: [PATCH] Change urProgramCreateWithBinary signature in adapters --- source/adapters/cuda/program.cpp | 20 ++++---- source/adapters/cuda/ur_interface_loader.cpp | 1 - source/adapters/hip/program.cpp | 20 ++++---- source/adapters/hip/ur_interface_loader.cpp | 1 - source/adapters/level_zero/program.cpp | 46 ++++--------------- source/adapters/native_cpu/program.cpp | 15 ++++-- .../native_cpu/ur_interface_loader.cpp | 1 - source/adapters/opencl/program.cpp | 21 ++++----- .../adapters/opencl/ur_interface_loader.cpp | 1 - test/adapters/cuda/kernel_tests.cpp | 42 +++++++++-------- 10 files changed, 76 insertions(+), 92 deletions(-) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index e4ba25df69..1bfd2ca363 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -483,23 +483,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - UR_CHECK_ERROR( - createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram)); + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + UR_CHECK_ERROR(createProgram(hContext, phDevices[0], pLengths[0], + ppBinaries[0], pProperties, phProgram)); (*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - // This entry point is only used for native specialization constants (SPIR-V), // and the CUDA plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index 9e1780ca6b..bb3fb9aee5 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -442,7 +442,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index c1ae3d8a42..5bbbebf7b8 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -480,9 +480,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( /// /// Note: Only supports one device UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + auto size = pLengths[0]; UR_ASSERT(std::find(hContext->getDevices().begin(), hContext->getDevices().end(), hDevice) != hContext->getDevices().end(), @@ -522,12 +532,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - // This entry point is only used for native specialization constants (SPIR-V), // and the HIP plugin is AOT only so this entry point is not supported. UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7f5ab21aff..af9b8fa9c3 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -407,7 +407,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 24b408209e..b93766ee2c 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -83,42 +83,6 @@ ur_result_t urProgramCreateWithIL( } ur_result_t urProgramCreateWithBinary( - ur_context_handle_t Context, ///< [in] handle of the context instance - ur_device_handle_t - Device, ///< [in] handle to device associated with binary. - size_t Size, ///< [in] size in bytes. - const uint8_t *Binary, ///< [in] pointer to binary. - const ur_program_properties_t - *Properties, ///< [in][optional] pointer to program creation properties. - ur_program_handle_t - *Program ///< [out] pointer to handle of Program object created. -) { - // In OpenCL, clCreateProgramWithBinary() can be used to load any of the - // following: "program executable", "compiled program", or "library of - // compiled programs". In addition, the loaded program can be either - // IL (SPIR-v) or native device code. For now, we assume that - // urProgramCreateWithBinary() is only used to load a "program executable" - // as native device code. - // If we wanted to support all the same cases as OpenCL, we would need to - // somehow examine the binary image to distinguish the cases. Alternatively, - // we could change the PI interface and have the caller pass additional - // information to distinguish the cases. - - try { - ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(ur_program_handle_t_::Native, Context, 1, - &Device, Properties, &Binary, &Size); - *Program = reinterpret_cast(UrProgram); - } catch (const std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t urProgramCreateWithBinaryExp( ur_context_handle_t hContext, ///< [in] handle of the context instance uint32_t numDevices, ///< [in] number of devices ur_device_handle_t @@ -135,6 +99,16 @@ ur_result_t urProgramCreateWithBinaryExp( ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created. ) { + // In OpenCL, clCreateProgramWithBinary() can be used to load any of the + // following: "program executable", "compiled program", or "library of + // compiled programs". In addition, the loaded program can be either + // IL (SPIR-v) or native device code. For now, we assume that + // urProgramCreateWithBinary() is only used to load a "program executable" + // as native device code. + // If we wanted to support all the same cases as OpenCL, we would need to + // somehow examine the binary image to distinguish the cases. Alternatively, + // we could change the PI interface and have the caller pass additional + // information to distinguish the cases. try { ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( ur_program_handle_t_::Native, hContext, numDevices, phDevices, diff --git a/source/adapters/native_cpu/program.cpp b/source/adapters/native_cpu/program.cpp index c338878f9e..9a0454d91a 100644 --- a/source/adapters/native_cpu/program.cpp +++ b/source/adapters/native_cpu/program.cpp @@ -54,10 +54,19 @@ deserializeWGMetadata(const ur_program_metadata_t &MetadataElement, } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - std::ignore = size; + if (numDevices > 1) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if (numDevices == 0) + return UR_RESULT_ERROR_INVALID_DEVICE; + + auto hDevice = phDevices[0]; + auto pBinary = ppBinaries[0]; + std::ignore = pLengths; std::ignore = pProperties; UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 69220121ba..ff6c9d8c0f 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -425,7 +425,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 7d583da87f..fd2e51ec69 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -116,29 +116,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *, + ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, + const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - - const cl_device_id Devices[1] = {cl_adapter::cast(hDevice)}; - const size_t Lengths[1] = {size}; - cl_int BinaryStatus[1]; + cl_device_id Devices[numDevices]; + for (uint32_t i = 0; i < numDevices; ++i) + Devices[i] = cl_adapter::cast(phDevices[i]); + cl_int BinaryStatus[numDevices]; cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( cl_adapter::cast(hContext), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); + Devices, pLengths, ppBinaries, BinaryStatus, &CLResult)); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinaryExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, size_t *, - const uint8_t **, const ur_program_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 5f1d1d87f2..100bb888cf 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -432,7 +432,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; - pDdiTable->pfnCreateWithBinaryExp = urProgramCreateWithBinaryExp; return UR_RESULT_SUCCESS; } diff --git a/test/adapters/cuda/kernel_tests.cpp b/test/adapters/cuda/kernel_tests.cpp index 80ec9146fd..085c03030b 100644 --- a/test/adapters/cuda/kernel_tests.cpp +++ b/test/adapters/cuda/kernel_tests.cpp @@ -74,9 +74,10 @@ const char *threeParamsTwoLocal = "\n\ TEST_P(cudaKernelTest, CreateProgramAndKernel) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -116,9 +117,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { ur_program_properties_t programProps{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, 1, &reqdWorkGroupSizeMDProp}; uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - &programProps, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + &programProps, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -138,9 +140,10 @@ TEST_P(cudaKernelTest, CreateProgramAndKernelWithMetadata) { TEST_P(cudaKernelTest, URKernelArgumentSimple) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -160,9 +163,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSimple) { TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -189,9 +193,10 @@ TEST_P(cudaKernelTest, URKernelArgumentSetTwice) { TEST_P(cudaKernelTest, URKernelDispatch) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)ptxSource, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&ptxSource), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); @@ -218,9 +223,10 @@ TEST_P(cudaKernelTest, URKernelDispatch) { TEST_P(cudaKernelTest, URKernelDispatchTwo) { uur::raii::Program program = nullptr; - ASSERT_SUCCESS(urProgramCreateWithBinary( - context, device, std::strlen(ptxSource), (const uint8_t *)twoParams, - nullptr, program.ptr())); + auto Length = std::strlen(ptxSource); + ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length, + (const uint8_t **)(&twoParams), + nullptr, program.ptr())); ASSERT_NE(program, nullptr); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr));