diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_global.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_global.asciidoc index fd7fdb0fe65fb..da282e3241068 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_global.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_global.asciidoc @@ -197,6 +197,11 @@ class device_global { _T_ is restricted to types that have a trivial destructor. _PropertyListT_ enables properties to be associated with a `device_global`. +[NOTE] +==== +If _T_ is prepended `const` then in the cuda and hip backends the compiler will use the constant address space for the `device_global` instead of the global address space. This allows for optimizations such as using the Nvidia constant cache, and is equivalent to usage of `__constant__` in the CUDA and HIP native programming languages. +==== + When compiling with {cpp} versions before {cpp}20, _T_ must also have a trivial default constructor. In this case, the allocation of type _T_ for a given `device_global` is zero-initialized on a given device prior to the first access to that `device_global` on that device. For the purposes of this definition an access can be a direct access of the `device_global` in kernel code or a copy to or from that `device_global` enqueued to the given device. When compiling with {cpp}20 or later, _T_ must have a constructor that can be `constexpr` evaluated, and the parameters to the `device_global` constructor are forwarded to the _T_ constructor. In this case, the allocation of type _T_ for a given `device_global` is initialized on a given device prior to the first access to that `device_global` on that device. diff --git a/sycl/test-e2e/DeviceGlobal/device_global_const.cpp b/sycl/test-e2e/DeviceGlobal/device_global_const.cpp deleted file mode 100644 index 0e1ed183dd378..0000000000000 --- a/sycl/test-e2e/DeviceGlobal/device_global_const.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// -// The OpenCL GPU backends do not currently support device_global backend -// calls. -// -// UNSUPPORTED: opencl && gpu - -#include "common.hpp" - -device_global DeviceGlobalVar; - -int main() { - queue Q; - - int HostVal = 42; - Q.memcpy(DeviceGlobalVar, &HostVal); - Q.wait(); - int OutVal = 0; - - { - buffer OutBuf(&OutVal, 1); - Q.submit([&](handler &CGH) { - auto OutAcc = OutBuf.get_access(CGH); - CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar.get(); }); - }); - } - - assert(OutVal == 42 && "Read value does not match."); - return 0; -} diff --git a/sycl/test/check_device_code/device_global_const.cpp b/sycl/test/check_device_code/device_global_const.cpp new file mode 100644 index 0000000000000..a91e40fe54825 --- /dev/null +++ b/sycl/test/check_device_code/device_global_const.cpp @@ -0,0 +1,19 @@ +// RUN: %clangxx -fsycl -fsycl-device-only %if cuda %{ -fsycl-targets=nvptx64-nvidia-cuda %} %if hip-amd %{ -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a %} -S -emit-llvm %s -o - | FileCheck %s %if cuda || hip-amd %{ --check-prefixes=CHECK-CONST %} + +// Tests that const T device_global uses const address space for cuda/hip + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +device_global DeviceGlobalVar; + +int main() { + queue Q; + Q.single_task([]() { + // CHECK-CONST: (ptr addrspace(4) @DeviceGlobalVar + volatile int ReadVal = DeviceGlobalVar; + }); + return 0; +}