Closed
Description
Describe the bug
Using sycl::queue::submit_barrier() with CUDA segfaults.
To Reproduce
Compiling and running
#include <CL/sycl.hpp>
int main() {
auto sycl_queue = sycl::queue(sycl::default_selector());
sycl_queue.submit_barrier();
};
via
clang++ -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -Xsycl-target-backend --cuda-gpu-arch=sm_70 -fsycl -fsycl-unnamed-lambda reduction.cpp && ./a.out
segfaults. The backtrace is
#0 0x0000000000000000 in ?? ()
#1 0x00007ffff71b093c in _pi_result cl::sycl::detail::plugin::call_nocheck<(cl::sycl::detail::PiApiKind)75, _pi_queue*, int, decltype(nullptr), _pi_event**>(_pi_queue*, int, decltype(nullptr), _pi_event**) const () from /opt/sycl/lib/libsycl.so.5
#2 0x00007ffff71ba9ee in cl::sycl::detail::ExecCGCommand::enqueueImp() ()
from /opt/sycl/lib/libsycl.so.5
#3 0x00007ffff71b5a67 in cl::sycl::detail::Command::enqueue(cl::sycl::detail::EnqueueResultT&, cl::sycl::detail::BlockingT) () from /opt/sycl/lib/libsycl.so.5
#4 0x00007ffff71c6cc2 in cl::sycl::detail::Scheduler::GraphProcessor::enqueueCommand(cl::sycl::detail::Command*, cl::sycl::detail::EnqueueResultT&, cl::sycl::detail::BlockingT) ()
from /opt/sycl/lib/libsycl.so.5
#5 0x00007ffff71c1181 in cl::sycl::detail::Scheduler::addCG(std::unique_ptr<cl::sycl::detail::CG, std::default_delete<cl::sycl::detail::CG> >, std::shared_ptr<cl::sycl::detail::queue_impl>) ()
from /opt/sycl/lib/libsycl.so.5
#6 0x00007ffff71fb9b1 in cl::sycl::handler::finalize() () from /opt/sycl/lib/libsycl.so.5
#7 0x00007ffff720f9b0 in cl::sycl::detail::queue_impl::submit_impl(std::function<void (cl::sycl::handler&)> const&, std::shared_ptr<cl::sycl::detail::queue_impl> const&, cl::sycl::detail::code_location const&) () from /opt/sycl/lib/libsycl.so.5
#8 0x00007ffff720fbd6 in cl::sycl::queue::submit_impl(std::function<void (cl::sycl::handler&)>, cl::sycl::detail::code_location const&) () from /opt/sycl/lib/libsycl.so.5
#9 0x000000000040338c in cl::sycl::event cl::sycl::queue::submit<cl::sycl::queue::submit_barrier(cl::sycl::detail::code_location const&)::{lambda(cl::sycl::handler&)#1}>(cl::sycl::queue::submit_barrier(cl::sycl::detail::code_location const&)::{lambda(cl::sycl::handler&)#1}, cl::sycl::detail::code_location const&) ()
#10 0x0000000000402d6b in cl::sycl::queue::submit_barrier(cl::sycl::detail::code_location const&) ()
#11 0x0000000000402802 in main ()
Environment (please complete the following information):
- OS: Linux
- Target device and vendor: NVIDIA CUDA GPU
- DPC++ version:
$ clang++ --version
clang version 12.0.0
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/sycl/bin
- Dependencies version: CUDA 10.2