Closed
Description
When a kernel enqueue to the cuda backend is coming from a thread, an exception is thrown.
When just before the kernel enqueue a buffer access ensures device state? everything completes fine (see the commented block in run())
PI CUDA ERROR:
Value: 201
Name: CUDA_ERROR_INVALID_CONTEXT
Description: invalid device context
Function: guessLocalWorkSize
Source Location: /home/simon/dev/para/gpufem/llvm/sycl/plugins/cuda/pi_cuda.cpp:257
terminate called after throwing an instance of 'cl::sycl::runtime_error'
what(): Enqueue process failed. -59 (CL_INVALID_OPERATION)
#include "CL/sycl.hpp"
#include <future>
#include <thread>
class Runner {
using elem_type = cl::sycl::cl_float;
public:
Runner(int array_length) : queue(), buffer(array_length) {
this->array_length = array_length;
}
void reset() {
auto buffer_acc = this->buffer.get_access<sycl::access::mode::discard_write>();
for (int i=0; i<this->array_length; i++) {
buffer_acc[i] = 0;
}
}
void run(){
// calculate node energy
{
//const auto buffer_acc = this->buffer.get_access<sycl::access::mode::read>();
// without this buffer accessor, the exception is thrown:
}
this->queue.submit([&](sycl::handler& cgh){
// Request access to the buffer
auto acc = this->buffer.get_access<sycl::access::mode::read_write>(cgh);
// Enqueue a parallel_for task.
cgh.parallel_for<class test>(
sycl::range<1>(this->array_length),
[=](sycl::id<1> id){
acc[id] += 1.;
}); // End of the kernel function
});
this->queue.wait_and_throw();
}
void print() {
const auto buffer_acc = this->buffer.get_access<sycl::access::mode::read>();
std::cout << "buffer size: " << buffer_acc.get_size() << ": " << buffer_acc[0] << std::endl;
}
private:
int array_length;
cl::sycl::queue queue;
cl::sycl::buffer<elem_type> buffer;
};
int main() {
int num = 10;
auto runner = new Runner(num);
runner->reset();
runner->run();
auto future = std::async(std::launch::async, [&](){runner->run();});
future.wait();
runner->print();
return 0;
}