Closed
Description
Describe the bug
how to understand the sycl::kernel get_infoinfo::kernel::num_args ?
To Reproduce
In my example code, I try to print kernel information about num_args, but it seems not match actual argument number.
mykernel-num-args.cc
#include <sycl/sycl.hpp>
using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names
// Forward declare names for our two kernels.
class MyKernel1;
class MyKernel2;
class MyKernel3;
extern int get_width();
extern int get_height();
constexpr size_t N = 10;
// Declare specialization constants used in our kernels.
constexpr specialization_id<int> width;
constexpr specialization_id<int> height;
int get_width(){
return 0;
}
int get_height(){
return 0;
}
std::string getBackendStr(backend bk){
switch(bk){
case backend::host:
return "host";
case backend::opencl:
return "host";
case backend::ext_oneapi_level_zero:
return "host";
case backend::ext_oneapi_cuda:
return "cuda";
case backend::ext_oneapi_hip:
return "hip";
case backend::all:
return "all";
case backend::ext_intel_esimd_emulator:
return "intel_esimd_emulator";
default:
return "unknown";
}
}
int main() {
queue myQueue;
auto myContext = myQueue.get_context();
auto myDev = myQueue.get_device();
std::cout << " Current Device: " << myDev.get_info<sycl::info::device::name>() << std::endl;
std::cout << " Current Device vendor id: " << myDev.get_info<sycl::info::device::vendor_id>()<< std::endl;
std::cout << " Current Device object address : " << &myDev << std::endl;
bool myDev_on_compile = myDev.has(aspect::online_compiler);
bool myDev_on_link = myDev.has(aspect::online_linker);
std::cout << " Current Device has online compiler is : " <<std::boolalpha<< myDev_on_compile << std::endl;
std::cout << " Current Device has online linker is : " <<std::boolalpha<< myDev_on_link << std::endl;
// Create some 1D buffers of float for our matrices
buffer<int, 1> a { range<1> { N } };
buffer<int, 1> b { range<1> { N } };
buffer<int, 1> c { range<1> { N } };
// Get the identifiers for our kernels, then get an input kernel bundle that
// contains our two kernels.
std::vector<kernel_id> kernelIds = { get_kernel_id<MyKernel1>(), get_kernel_id<MyKernel2>(),get_kernel_id<MyKernel3>() };
for(auto kernel_id : kernelIds)
std::cout<<"definition kernel id name : "<<kernel_id.get_name()<<std::endl;
// auto inputBundle = get_kernel_bundle<bundle_state::input>(myContext,{myDev},kernelIds);
auto inputBundle = get_kernel_bundle<bundle_state::input>(myContext);
auto backend = inputBundle.get_backend();
std::cout<<"bundle backend : "<<getBackendStr(backend)<<std::endl;
auto devices = inputBundle.get_devices();
for (auto &device : devices) {
std::cout << " Bundle Device: " << device.get_info<sycl::info::device::name>() << std::endl;
std::cout << " Bundle Device vendor id: " << device.get_info<sycl::info::device::vendor_id>()<< std::endl;
std::cout << " Bundle Device object address : " << &device << std::endl;
}
std::cout<<"bundle device equal myDev : "<<std::boolalpha<<(myDev == devices[0])<<std::endl;
std::vector<kernel_id> inputKernelIds = inputBundle.get_kernel_ids();
for(auto kernel_id : inputKernelIds)
std::cout<<"input kernel id name : "<<kernel_id.get_name()<<std::endl;
std::vector<kernel_id> builtinKernelIds =
myDev.get_info<info::device::built_in_kernel_ids>();
std::cout<<"device get kernel id size : "<<builtinKernelIds.size()<<std::endl;
for(auto kernel_id : builtinKernelIds)
std::cout<<"device ("<<myDev.get_info<info::device::name>()<<")"<<" backend ("<<myDev.get_info<info::device::backend_version>()<<") built in kernel id name : "<<kernel_id.get_name()<<std::endl;
// Set the values of the specialization constants.
inputBundle.set_specialization_constant<width>(get_width());
inputBundle.set_specialization_constant<height>(get_height());
// Build the kernel bundle into an executable form. The values of the
// specialization constants are compiled in.
auto exeBundle = build(inputBundle);
myQueue.submit([&](handler& cgh) {
// Use the kernel bundle we built in this command group.
cgh.use_kernel_bundle(exeBundle);
accessor A { a, cgh, write_only };
cgh.parallel_for<MyKernel1>(
range { N }, ([=](item<1> index, kernel_handler kh) {
// Read the value of the specialization constant.
int w = kh.get_specialization_constant<width>();
// ...
A[index] = w + 3;
}));
});
myQueue.submit([&](handler& cgh) {
// This command group uses the same kernel bundle.
cgh.use_kernel_bundle(exeBundle);
accessor B { b, cgh, write_only };
cgh.parallel_for<MyKernel2>(
range { N }, ([=](item<1> index, kernel_handler kh) {
int h = kh.get_specialization_constant<height>();
// ...
B[index] = h + 2;
}));
});
std::vector<kernel_id> execKernelIds = exeBundle.get_kernel_ids();
std::cout<<"executable kernel size : "<<execKernelIds.size()<<std::endl;
for(auto kernel_id : execKernelIds){
std::cout<<"exec kernel id name : "<<kernel_id.get_name()<<std::endl;
kernel myKernel = exeBundle.get_kernel<bundle_state::executable>(kernel_id);
std::cout<<"exec kernel id info function_name : "<<myKernel.get_info<info::kernel::function_name>()<<std::endl;
std::cout<<"exec kernel id info reference count : "<<myKernel.get_info<info::kernel::reference_count>()<<std::endl;
std::cout<<"exec kernel id info num_args : "<<myKernel.get_info<info::kernel::num_args>()<<std::endl;
std::cout<<"exec kernel id info attributes : "<<myKernel.get_info<info::kernel::attributes>()<<std::endl;
}
myQueue.submit([&](handler& cgh) {
// This command group uses the same kernel bundle.
cgh.use_kernel_bundle(exeBundle);
// In the kernel a and b are read, but c is written
accessor A { a, cgh, read_only };
accessor B { b, cgh, read_only };
accessor C { c, cgh, write_only };
cgh.parallel_for<MyKernel3>(
range { N }, ([=](item<1> index, kernel_handler kh) {
C[index] = A[index] + B[index] ;
}));
});
myQueue.wait();
host_accessor A { a, read_only };
host_accessor B { b, read_only };
host_accessor C { c, read_only };
std::cout << std::endl << "Result:" << std::endl;
for (size_t i = 0; i < N; i++) {
// std::cout<<"A A["<<i<<"] = "<<A[i]<<std::endl;
// std::cout<<"B B["<<i<<"] = "<<B[i]<<std::endl;
// std::cout<<"C C["<<i<<"] = "<<C[i]<<std::endl;
// Compare the result to the analytic value
if (C[i] != A[i] + B[i] ) {
std::cout << "Wrong value " << C[i] << " on element " << i << " " << std::endl;
exit(-1);
}
}
std::cout << "Good computation!" << std::endl;
}
The console print as follows. Why '_ZTS9MyKernel1' args_num is 5, '_ZTS9MyKernel3' args_num is 0. I am confused about the problem.
Can you help me?
Environment (please complete the following information):
- OS: Linux
- Target device and vendor: Nvidia GPU
- DPC++ version: [2022-09]
- Dependencies version: cuda 11.2
Additional context
Add any other context about the problem here.