Skip to content

How to understand the sycl::kernel get_info<info::kernel::num_args> ? #7630

Closed
@wangzy0327

Description

@wangzy0327

Describe the bug
how to understand the sycl::kernel get_infoinfo::kernel::num_args ?

To Reproduce

In my example code, I try to print kernel information about num_args, but it seems not match actual argument number.

mykernel-num-args.cc
#include <sycl/sycl.hpp>
using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names

// Forward declare names for our two kernels.
class MyKernel1;
class MyKernel2;
class MyKernel3;

extern int get_width();
extern int get_height();

constexpr size_t N = 10;

// Declare specialization constants used in our kernels.
constexpr specialization_id<int> width;
constexpr specialization_id<int> height;


int get_width(){
    return 0;
}

int get_height(){
    return 0;
}

std::string getBackendStr(backend bk){
  switch(bk){
    case backend::host:
      return "host";
    case backend::opencl:
      return "host";
    case backend::ext_oneapi_level_zero:
      return "host";
    case backend::ext_oneapi_cuda:
      return "cuda";
    case backend::ext_oneapi_hip:
      return "hip";
    case backend::all:
      return "all";
    case backend::ext_intel_esimd_emulator:
      return "intel_esimd_emulator";
    default:
      return "unknown";
  }
}

int main() {
  queue myQueue;
  auto myContext = myQueue.get_context();
  auto myDev = myQueue.get_device();

  std::cout << " Current Device: " << myDev.get_info<sycl::info::device::name>() << std::endl;
  std::cout << " Current Device vendor id: " << myDev.get_info<sycl::info::device::vendor_id>()<< std::endl;
  std::cout << " Current Device object address : " << &myDev << std::endl;  

  bool myDev_on_compile = myDev.has(aspect::online_compiler);
  bool myDev_on_link = myDev.has(aspect::online_linker);

  std::cout << " Current Device has online compiler is : " <<std::boolalpha<< myDev_on_compile << std::endl;
  std::cout << " Current Device has online linker is : " <<std::boolalpha<< myDev_on_link << std::endl;


 // Create some 1D buffers of float for our matrices
  buffer<int, 1> a { range<1> { N } };
  buffer<int, 1> b { range<1> { N } };
  buffer<int, 1> c { range<1> { N } };

  // Get the identifiers for our kernels, then get an input kernel bundle that
  // contains our two kernels.
  std::vector<kernel_id> kernelIds = { get_kernel_id<MyKernel1>(), get_kernel_id<MyKernel2>(),get_kernel_id<MyKernel3>() };
  for(auto kernel_id : kernelIds)
    std::cout<<"definition kernel id name : "<<kernel_id.get_name()<<std::endl;
  // auto inputBundle = get_kernel_bundle<bundle_state::input>(myContext,{myDev},kernelIds);
  auto inputBundle = get_kernel_bundle<bundle_state::input>(myContext);

  auto backend = inputBundle.get_backend();

  std::cout<<"bundle backend : "<<getBackendStr(backend)<<std::endl;

  auto devices = inputBundle.get_devices();
  for (auto &device : devices) {
      std::cout << " Bundle Device: " << device.get_info<sycl::info::device::name>() << std::endl;
      std::cout << " Bundle Device vendor id: " << device.get_info<sycl::info::device::vendor_id>()<< std::endl;
      std::cout << " Bundle Device object address : " << &device << std::endl;                    
  }

  std::cout<<"bundle device equal myDev : "<<std::boolalpha<<(myDev == devices[0])<<std::endl;

  std::vector<kernel_id> inputKernelIds = inputBundle.get_kernel_ids();
  for(auto kernel_id : inputKernelIds)
    std::cout<<"input kernel id name : "<<kernel_id.get_name()<<std::endl;

  std::vector<kernel_id> builtinKernelIds =
        myDev.get_info<info::device::built_in_kernel_ids>();
  std::cout<<"device get kernel id size : "<<builtinKernelIds.size()<<std::endl;
  for(auto kernel_id : builtinKernelIds)
    std::cout<<"device ("<<myDev.get_info<info::device::name>()<<")"<<" backend ("<<myDev.get_info<info::device::backend_version>()<<") built in kernel id name : "<<kernel_id.get_name()<<std::endl;

  // Set the values of the specialization constants.
  inputBundle.set_specialization_constant<width>(get_width());
  inputBundle.set_specialization_constant<height>(get_height());


  // Build the kernel bundle into an executable form.  The values of the
  // specialization constants are compiled in.
  auto exeBundle = build(inputBundle);


  myQueue.submit([&](handler& cgh) {
    // Use the kernel bundle we built in this command group.
    cgh.use_kernel_bundle(exeBundle);
    accessor A { a, cgh, write_only };
    cgh.parallel_for<MyKernel1>(
        range { N }, ([=](item<1> index, kernel_handler kh) {
          // Read the value of the specialization constant.
          int w = kh.get_specialization_constant<width>();
          // ...
          A[index] = w + 3;
        }));
  });

  myQueue.submit([&](handler& cgh) {
    // This command group uses the same kernel bundle.
    cgh.use_kernel_bundle(exeBundle);
    accessor B { b, cgh, write_only };
    cgh.parallel_for<MyKernel2>(
        range { N }, ([=](item<1> index, kernel_handler kh) {
          int h = kh.get_specialization_constant<height>();
          // ...
          B[index] = h + 2;
        }));
  });

  std::vector<kernel_id> execKernelIds = exeBundle.get_kernel_ids();
  std::cout<<"executable kernel size : "<<execKernelIds.size()<<std::endl;
  for(auto kernel_id : execKernelIds){
    std::cout<<"exec kernel id name : "<<kernel_id.get_name()<<std::endl;
    kernel myKernel = exeBundle.get_kernel<bundle_state::executable>(kernel_id);
    std::cout<<"exec kernel id info function_name : "<<myKernel.get_info<info::kernel::function_name>()<<std::endl;
    std::cout<<"exec kernel id info reference count : "<<myKernel.get_info<info::kernel::reference_count>()<<std::endl;
    std::cout<<"exec kernel id info num_args : "<<myKernel.get_info<info::kernel::num_args>()<<std::endl;
    std::cout<<"exec kernel id info attributes : "<<myKernel.get_info<info::kernel::attributes>()<<std::endl;
  }
  
  

  myQueue.submit([&](handler& cgh) {
    // This command group uses the same kernel bundle.
    cgh.use_kernel_bundle(exeBundle);
    // In the kernel a and b are read, but c is written
    accessor A { a, cgh, read_only };
    accessor B { b, cgh, read_only };
    accessor C { c, cgh, write_only };
    cgh.parallel_for<MyKernel3>(
        range { N }, ([=](item<1> index, kernel_handler kh) {
          C[index] = A[index] + B[index] ;
          
        }));
  });

  myQueue.wait();

  host_accessor A { a, read_only };
  host_accessor B { b, read_only };
  host_accessor C { c, read_only };
  std::cout << std::endl << "Result:" << std::endl;
  for (size_t i = 0; i < N; i++) {
    // std::cout<<"A A["<<i<<"] = "<<A[i]<<std::endl;
    // std::cout<<"B B["<<i<<"] = "<<B[i]<<std::endl;
    // std::cout<<"C C["<<i<<"] = "<<C[i]<<std::endl;
    // Compare the result to the analytic value
    if (C[i] != A[i] + B[i] ) {
        std::cout << "Wrong value " << C[i] << " on element " << i << " " << std::endl;
        exit(-1);
    }
  }

  std::cout << "Good computation!" << std::endl;

}

The console print as follows. Why '_ZTS9MyKernel1' args_num is 5, '_ZTS9MyKernel3' args_num is 0. I am confused about the problem.

Can you help me?

image

Environment (please complete the following information):

  • OS: Linux
  • Target device and vendor: Nvidia GPU
  • DPC++ version: [2022-09]
  • Dependencies version: cuda 11.2

Additional context
Add any other context about the problem here.

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions