Closed
Description
Describe the bug
CTS math_builtin_api fail with CUDA backend on latest SYCL builds and latest CUDA version. See logs below:
(gdb) bt
#0 0x00007fffe4daa282 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#1 0x00007fffe4d0c1e4 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#2 0x00007fffe4d0c34f in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#3 0x00007fffe4ce603b in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#4 0x00007fffe4ce6bca in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#5 0x00007fffe4eb3f83 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#6 0x00007fffe4eb4027 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#7 0x00007fffe4baabf4 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#8 0x00007fffe4bb3578 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#9 0x00007fffe4bb77c2 in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#10 0x00007fffe4bb8c2c in ?? () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#11 0x00007fffe4bac05c in __cuda_CallJitEntryPoint () from /lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.1
#12 0x00007ffff5cb6942 in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1
#13 0x00007ffff5d0510d in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1
#14 0x00007ffff5a98d7a in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1
#15 0x00007ffff5a47770 in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1
#16 0x00007ffff5b0af88 in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1
#17 0x00007ffff70438a0 in _pi_program::build_program(char const*) ()
from compiler_build/lib/libpi_cuda.so
#18 0x00007ffff7043c28 in cuda_piProgramBuild () from compiler_build/lib/libpi_cuda.so
#19 0x00007ffff772619f in _pi_result cl::sycl::detail::plugin::call_nocheck<(cl::sycl::detail::PiApiKind)41, _pi_program*, int, _pi_device* const*, char const*, decltype(nullptr), decltype(nullptr)>(_pi_program*, int, _pi_device* const*, char const*, decltype(nullptr), decltype(nullptr)) const ()
from compiler_build/lib/libsycl.so.5
#20 0x00007ffff7728eb8 in cl::sycl::detail::ProgramManager::build(std::unique_ptr<_pi_program, _pi_result (*)(_pi_program*)>, std::shared_ptr<cl::sycl::detail::context_impl>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, _pi_device* const&, std::map<std::pair<cl::sycl::detail::DeviceLibExt, _pi_device*>, _pi_program*, std::less<std::pair<cl::sycl::detail::DeviceLibExt, _pi_device*> >, std::allocator<std::pair<std::pair<cl::sycl::detail::DeviceLibExt, _pi_device*> const, _pi_program*> > >&, unsigned int) ()
from compiler_build/lib/libsycl.so.5
#21 0x00007ffff772da16 in cl::sycl::detail::ProgramManager::getBuiltPIProgram(long, std::shared_ptr<cl::sycl::detail::context_impl> const&, std::shared_ptr<cl::sycl::detail::device_impl> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, cl::sycl::detail::program_impl const*, bool) ()
from compiler_build/lib/libsycl.so.5
#22 0x00007ffff77342bd in cl::sycl::detail::ProgramManager::getOrCreateKernel(long, std::shared_ptr<cl::sycl::detail::context_impl> const&, std::shared_ptr<cl::sycl::detail::device_impl> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, cl::sycl::detail::program_impl const*) ()
from compiler_build/lib/libsycl.so.5
#23 0x00007ffff7766a80 in cl::sycl::detail::ExecCGCommand::enqueueImp() ()
from compiler_build/lib/libsycl.so.5
#24 0x00007ffff775ea27 in cl::sycl::detail::Command::enqueue(cl::sycl::detail::EnqueueResultT&, cl::sycl::detail::BlockingT) ()
from compiler_build/lib/libsycl.so.5
#25 0x00007ffff776ab27 in cl::sycl::detail::Scheduler::addCG(std::unique_ptr<cl::sycl::detail::CG, std::default_delete<cl::sycl::detail::CG> >, std::shared_ptr<cl::sycl::detail::queue_impl>) () from compiler_build/lib/libsycl.so.5
#26 0x00007ffff77a5f80 in cl::sycl::handler::finalize() () from compiler_build/lib/libsycl.so.5
#27 0x00007ffff77c6888 in cl::sycl::detail::queue_impl::submit_impl(std::function<void (cl::sycl::handler&)> const&, std::shared_ptr<cl::sycl::detail::queue_impl> const&, cl::sycl::detail::code_location const&) () from compiler_build/lib/libsycl.so.5
#28 0x00007ffff77c6b52 in cl::sycl::queue::submit_impl(std::function<void (cl::sycl::handler&)>, cl::sycl::detail::code_location const&) ()
from compiler_build/lib/libsycl.so.5
#29 0x000000000048589d in cl::sycl::event cl::sycl::queue::submit<check_function<1000000, float, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}>(sycl_cts::util::logger&, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}, sycl_cts::resultRef<float>, int, std::__--Type <RET> for more, q to quit, c to continue without paging--
cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(cl::sycl::handler&)#1}>(check_function<1000000, float, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}>(sycl_cts::util::logger&, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}, sycl_cts::resultRef<float>, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(cl::sycl::handler&)#1}, cl::sycl::detail::code_location const&) ()
#30 0x0000000000465258 in void check_function<1000000, float, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}>(sycl_cts::util::logger&, math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&)::{lambda()#1}, sycl_cts::resultRef<float>, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) ()
#31 0x000000000045733f in math_builtin_common_base__::math_builtin_common_base::run(sycl_cts::util::logger&) ()
#32 0x00000000018fd283 in sycl_cts::util::executor::run_all() ()
#33 0x00000000018fa1e9 in sycl_cts::util::test_manager::run() ()
#34 0x00000000018e8a50 in main ()
To Reproduce
Note: Due to a recent uplift of minimal supported C++ standard, one should modify this line to require C++17.
cd SYCL-CTS
mkdir build && cd build
cmake -GNinja -DSYCL_IMPLEMENTATION=Intel_SYCL -DINTEL_SYCL_ROOT=<path to built sycl> -Dopencl_platform_name=nvidia -Dopencl_device_name=opencl_gpu -DCMAKE_BUILD_TYPE=Release -DINTEL_SYCL_FLAGS="-Xsycl-target-backend;--cuda-gpu-arch=sm_50" -DCMAKE_EXE_LINKER_FLAGS=" -Wl,-no-relax " -DINTEL_SYCL_TRIPLE=nvptx64-nvidia-cuda-sycldevice -DSYCL_CTS_ENABLE_OPENCL_INTEROP_TESTS=Off -DSYCL_CTS_ENABLE_DOUBLE_TESTS=On -DSYCL_CTS_ENABLE_HALF_TESTS=On ..
ninja test_math_builtin_api
- Run Test
SYCL_DEVICE_FILTER="cuda:gpu:0" ./bin/test_handler -p nvidia -d opencl_gpu --test math_builtin_common_base
Environment (please complete the following information):
OS version: Ubuntu 20.04.3 LTS
CUDA version:
nvidia-smi
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |
|-------------------------------+----------------------+----------------------+