diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index a55a6ec00d7c9..de1eee7f233f6 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -378,7 +378,7 @@ outlining job:
 
   int foo(int x) { return ++x; }
 
-  using namespace cl::sycl;
+  using namespace sycl;
   queue Q;
   buffer<int, 1> a(range<1>{1024});
   Q.submit([&](handler& cgh) {
@@ -3790,7 +3790,7 @@ cannot be optimized out due to reachability analysis or by any other
 optimization.
 
 This attribute allows to pass name and address of the function to a special
-``cl::sycl::intel::get_device_func_ptr`` API call which extracts the device
+``sycl::intel::get_device_func_ptr`` API call which extracts the device
 function pointer for the specified function.
 
 .. code-block:: c++
diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp
index 5b0dc95d144f6..f1ad112c331e1 100644
--- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp
+++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp
@@ -780,7 +780,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
          I = I->getNextNode()) {
       auto *AllocaI = dyn_cast<AllocaInst>(I);
       // Allocas marked with "work_item_scope" are those originating from
-      // cl::sycl::private_memory<T> variables, which must be in private memory.
+      // sycl::private_memory<T> variables, which must be in private memory.
       // No shadows/materialization is needed for them because they can be
       // updated only within PFWIs
       if (AllocaI && !AllocaI->getMetadata(WI_SCOPE_MD))
diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md
index 3e0b7316750d1..3a8c5da4adc18 100755
--- a/sycl/doc/EnvironmentVariables.md
+++ b/sycl/doc/EnvironmentVariables.md
@@ -8,7 +8,7 @@ compiler and runtime.
 | Environment variable | Values | Description |
 | -------------------- | ------ | ----------- |
 | `SYCL_BE` (deprecated) | `PI_OPENCL`, `PI_LEVEL_ZERO`, `PI_CUDA` | Force SYCL RT to consider only devices of the specified backend during the device selection. We are planning to deprecate `SYCL_BE` environment variable in the future. The specific grace period is not decided yet. Please use the new env var `SYCL_DEVICE_FILTER` instead. |
-| `SYCL_DEVICE_TYPE` (deprecated) | CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `cl::sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. We are planning to deprecate `SYCL_DEVICE_TYPE` environment variable in the future. The specific grace period is not decided yet. Please use the new env var `SYCL_DEVICE_FILTER` instead. |
+| `SYCL_DEVICE_TYPE` (deprecated) | CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. We are planning to deprecate `SYCL_DEVICE_TYPE` environment variable in the future. The specific grace period is not decided yet. Please use the new env var `SYCL_DEVICE_FILTER` instead. |
 | `SYCL_DEVICE_FILTER` | `backend:device_type:device_num` | See Section [`SYCL_DEVICE_FILTER`](#sycl_device_filter)  below. |
 | `SYCL_DEVICE_ALLOWLIST` | See [below](#sycl_device_allowlist) | Filter out devices that do not match the pattern specified. `BackendName` accepts `host`, `opencl`, `level_zero` or `cuda`. `DeviceType` accepts `host`, `cpu`, `gpu` or `acc`. `DeviceVendorId` accepts uint32_t in hex form (`0xXYZW`). `DriverVersion`, `PlatformVersion`, `DeviceName` and `PlatformName` accept regular expression. Special characters, such as parenthesis, must be escaped. DPC++ runtime will select only those devices which satisfy provided values above and regex. More than one device can be specified using the piping symbol "\|".|
 | `SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING` | Any(\*) | Disables automatic rounding-up of `parallel_for` invocation ranges. |
@@ -107,7 +107,7 @@ variables in production code.</span>
 | `SYCL_DEVICELIB_INHIBIT_NATIVE` | String of device library extensions (separated by a whitespace) | Do not rely on device native support for devicelib extensions listed in this option. |
 | `SYCL_PROGRAM_COMPILE_OPTIONS` | String of valid OpenCL compile options | Override compile options for all programs. |
 | `SYCL_PROGRAM_LINK_OPTIONS` | String of valid OpenCL link options | Override link options for all programs. |
-| `SYCL_USE_KERNEL_SPV` | Path to the SPIR-V binary | Load device image from the specified file. If runtime is unable to read the file, `cl::sycl::runtime_error` exception is thrown.|
+| `SYCL_USE_KERNEL_SPV` | Path to the SPIR-V binary | Load device image from the specified file. If runtime is unable to read the file, `sycl::runtime_error` exception is thrown.|
 | `SYCL_DUMP_IMAGES` | Any(\*) | Dump device image binaries to file. Control has no effect if `SYCL_USE_KERNEL_SPV` is set. |
 | `SYCL_HOST_UNIFIED_MEMORY` | Integer | Enforce host unified memory support or lack of it for the execution graph builder. If set to 0, it is enforced as not supported by all devices. If set to 1, it is enforced as supported by all devices. |
 | `SYCL_CACHE_TRACE` | Any(\*) | If the variable is set, messages are sent to std::cerr when caching events or non-blocking failures happen (e.g. unable to access cache item file). |
diff --git a/sycl/doc/FAQ.md b/sycl/doc/FAQ.md
index 55d9d666c8f1c..e3653733363f5 100644
--- a/sycl/doc/FAQ.md
+++ b/sycl/doc/FAQ.md
@@ -128,7 +128,7 @@ C:\Program Files (x86)\Windows Kits\10\include\10.0.18362.0\ucrt\crtdbg.h(607,26
 >  beyond those explicitly mentioned as usable in kernels in this spec.
 
 Replace usage of STD built-ins with SYCL-defined math built-ins. Please, note
-that you have to explicitly specify built-in namespace (i.e. `cl::sycl::fmin`).
+that you have to explicitly specify built-in namespace (i.e. `sycl::fmin`).
 The full list of SYCL math built-ins is provided in section 4.13.3 of the
 specification.
 
diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md
index 168fca5556463..3c393e796b3fc 100644
--- a/sycl/doc/GetStartedGuide.md
+++ b/sycl/doc/GetStartedGuide.md
@@ -557,29 +557,29 @@ Creating a file `simple-sycl-app.cpp` with the following C++/SYCL code:
 
 int main() {
   // Creating buffer of 4 ints to be used inside the kernel code
-  cl::sycl::buffer<cl::sycl::cl_int, 1> Buffer(4);
+  sycl::buffer<sycl::cl_int, 1> Buffer(4);
 
   // Creating SYCL queue
-  cl::sycl::queue Queue;
+  sycl::queue Queue;
 
   // Size of index space for kernel
-  cl::sycl::range<1> NumOfWorkItems{Buffer.size()};
+  sycl::range<1> NumOfWorkItems{Buffer.size()};
 
   // Submitting command group(work) to queue
-  Queue.submit([&](cl::sycl::handler &cgh) {
+  Queue.submit([&](sycl::handler &cgh) {
     // Getting write only access to the buffer on a device
-    auto Accessor = Buffer.get_access<cl::sycl::access::mode::write>(cgh);
+    auto Accessor = Buffer.get_access<sycl::access::mode::write>(cgh);
     // Executing kernel
     cgh.parallel_for<class FillBuffer>(
-        NumOfWorkItems, [=](cl::sycl::id<1> WIid) {
+        NumOfWorkItems, [=](sycl::id<1> WIid) {
           // Fill buffer with indexes
-          Accessor[WIid] = (cl::sycl::cl_int)WIid.get(0);
+          Accessor[WIid] = (sycl::cl_int)WIid.get(0);
         });
   });
 
   // Getting read only access to the buffer on the host.
   // Implicit barrier waiting for queue to complete the work.
-  const auto HostAccessor = Buffer.get_access<cl::sycl::access::mode::read>();
+  const auto HostAccessor = Buffer.get_access<sycl::access::mode::read>();
 
   // Check the results
   bool MismatchFound = false;
@@ -704,36 +704,36 @@ SYCL_BE=PI_CUDA ./simple-sycl-app-cuda.exe
 ```
 
 **NOTE**: DPC++/SYCL developers can specify SYCL device for execution using
-device selectors (e.g. `cl::sycl::cpu_selector`, `cl::sycl::gpu_selector`,
+device selectors (e.g. `sycl::cpu_selector`, `sycl::gpu_selector`,
 [Intel FPGA selector(s)](extensions/supported/sycl_ext_intel_fpga_device_selector.md)) as
 explained in following section [Code the program for a specific
 GPU](#code-the-program-for-a-specific-gpu).
 
 ### Code the program for a specific GPU
 
-To specify OpenCL device SYCL provides the abstract `cl::sycl::device_selector`
+To specify OpenCL device SYCL provides the abstract `sycl::device_selector`
 class which the can be used to define how the runtime should select the best
 device.
 
-The method `cl::sycl::device_selector::operator()` of the SYCL
-`cl::sycl::device_selector` is an abstract member function which takes a
+The method `sycl::device_selector::operator()` of the SYCL
+`sycl::device_selector` is an abstract member function which takes a
 reference to a SYCL device and returns an integer score. This abstract member
 function can be implemented in a derived class to provide a logic for selecting
 a SYCL device. SYCL runtime uses the device for with the highest score is
-returned. Such object can be passed to `cl::sycl::queue` and `cl::sycl::device`
+returned. Such object can be passed to `sycl::queue` and `sycl::device`
 constructors.
 
-The example below illustrates how to use `cl::sycl::device_selector` to create
+The example below illustrates how to use `sycl::device_selector` to create
 device and queue objects bound to Intel GPU device:
 
 ```c++
 #include <sycl/sycl.hpp>
 
 int main() {
-  class NEOGPUDeviceSelector : public cl::sycl::device_selector {
+  class NEOGPUDeviceSelector : public sycl::device_selector {
   public:
-    int operator()(const cl::sycl::device &Device) const override {
-      using namespace cl::sycl::info;
+    int operator()(const sycl::device &Device) const override {
+      using namespace sycl::info;
 
       const std::string DeviceName = Device.get_info<device::name>();
       const std::string DeviceVendor = Device.get_info<device::vendor>();
@@ -744,9 +744,9 @@ int main() {
 
   NEOGPUDeviceSelector Selector;
   try {
-    cl::sycl::queue Queue(Selector);
-    cl::sycl::device Device(Selector);
-  } catch (cl::sycl::invalid_parameter_error &E) {
+    sycl::queue Queue(Selector);
+    sycl::device Device(Selector);
+  } catch (sycl::invalid_parameter_error &E) {
     std::cout << E.what() << std::endl;
   }
 }
@@ -757,10 +757,10 @@ The device selector below selects an NVIDIA device only, and won't execute if
 there is none.
 
 ```c++
-class CUDASelector : public cl::sycl::device_selector {
+class CUDASelector : public sycl::device_selector {
   public:
-    int operator()(const cl::sycl::device &Device) const override {
-      using namespace cl::sycl::info;
+    int operator()(const sycl::device &Device) const override {
+      using namespace sycl::info;
       const std::string DriverVersion = Device.get_info<device::driver_version>();
 
       if (Device.is_gpu() && (DriverVersion.find("CUDA") != std::string::npos)) {
diff --git a/sycl/doc/MultiTileCardWithLevelZero.md b/sycl/doc/MultiTileCardWithLevelZero.md
index f719be41f9340..797191240419a 100644
--- a/sycl/doc/MultiTileCardWithLevelZero.md
+++ b/sycl/doc/MultiTileCardWithLevelZero.md
@@ -46,8 +46,8 @@ The root-device in such cases can be partitioned to sub-devices, each correspond
 ``` C++	
 try {
   vector<device> SubDevices = RootDevice.create_sub_devices<
-  cl::sycl::info::partition_property::partition_by_affinity_domain>(
-  cl::sycl::info::partition_affinity_domain::next_partitionable);
+  sycl::info::partition_property::partition_by_affinity_domain>(
+  sycl::info::partition_affinity_domain::next_partitionable);
 }
 ```
 
diff --git a/sycl/doc/design/CompilerAndRuntimeDesign.md b/sycl/doc/design/CompilerAndRuntimeDesign.md
index cb85b6bd346fe..0dfb35acbab87 100644
--- a/sycl/doc/design/CompilerAndRuntimeDesign.md
+++ b/sycl/doc/design/CompilerAndRuntimeDesign.md
@@ -90,7 +90,7 @@ work:
 int foo(int x) { return ++x; }
 int bar(int x) { throw std::exception{"CPU code only!"}; }
 ...
-using namespace cl::sycl;
+using namespace sycl;
 queue Q;
 buffer<int, 1> a{range<1>{1024}};
 Q.submit([&](handler& cgh) {
@@ -103,17 +103,17 @@ Q.submit([&](handler& cgh) {
 ```
 
 In this example, the compiler needs to compile the lambda expression passed
-to the `cl::sycl::handler::parallel_for` method, as well as the function `foo`
+to the `sycl::handler::parallel_for` method, as well as the function `foo`
 called from the lambda expression for the device.
 
 The compiler must also ignore the `bar` function when we compile the
 "device" part of the single source code, as it's unused inside the device
 portion of the source code (the contents of the lambda expression passed to the
-`cl::sycl::handler::parallel_for` and any function called from this lambda
+`sycl::handler::parallel_for` and any function called from this lambda
 expression).
 
 The current approach is to use the SYCL kernel attribute in the runtime to
-mark code passed to `cl::sycl::handler::parallel_for` as "kernel functions".
+mark code passed to `sycl::handler::parallel_for` as "kernel functions".
 The runtime library can't mark foo as "device" code - this is a compiler
 job: to traverse all symbols accessible from kernel functions and add them to
 the "device part" of the code marking them with the new SYCL device attribute.
diff --git a/sycl/doc/design/KernelParameterPassing.md b/sycl/doc/design/KernelParameterPassing.md
index 706e1e09fee1b..67e7eca02ec6c 100644
--- a/sycl/doc/design/KernelParameterPassing.md
+++ b/sycl/doc/design/KernelParameterPassing.md
@@ -60,7 +60,7 @@ int main()
 
   myQueue.submit([&](handler &cgh) {
     auto outAcc = outBuf.get_access<access::mode::write>(cgh);
-    cgh.parallel_for<class Worker>(num_items, [=](cl::sycl::id<1> index) {
+    cgh.parallel_for<class Worker>(num_items, [=](sycl::id<1> index) {
       outAcc[index] = i + s.m;
     });
   });
@@ -192,7 +192,7 @@ are copied into the array within the local capture object.
 
   myQueue.submit([&](handler &cgh) {
     auto outAcc = outBuf.get_access<access::mode::write>(cgh);
-    cgh.parallel_for<class Worker>(num_items, [=](cl::sycl::id<1> index) {
+    cgh.parallel_for<class Worker>(num_items, [=](sycl::id<1> index) {
       outAcc[index] = array[index.get(0)];
     });
   });
@@ -264,7 +264,7 @@ of each accessor array element in ascending index value.
                          in_buffer2.get_access<access::mode::read>(cgh)};
     auto outAcc = out_buffer.get_access<access::mode::write>(cgh);
 
-    cgh.parallel_for<class Worker>(num_items, [=](cl::sycl::id<1> index) {
+    cgh.parallel_for<class Worker>(num_items, [=](sycl::id<1> index) {
       outAcc[index] = inAcc[0][index] + inAcc[1][index];
     });
   });
@@ -356,7 +356,7 @@ in a manner similar to other instances of accessor arrays.
    };
    auto outAcc = out_buffer.get_access<access::mode::write>(cgh);
 
-   cgh.parallel_for<class Worker>(num_items, [=](cl::sycl::id<1> index) {
+   cgh.parallel_for<class Worker>(num_items, [=](sycl::id<1> index) {
      outAcc[index] = s.m + s.inAcc[0][index] + s.inAcc[1][index];
    });
 });
diff --git a/sycl/doc/design/LinkedAllocations.md b/sycl/doc/design/LinkedAllocations.md
index 3bd97893702ec..3fb1272110d2e 100644
--- a/sycl/doc/design/LinkedAllocations.md
+++ b/sycl/doc/design/LinkedAllocations.md
@@ -9,21 +9,21 @@ Instead, memory is allocated in each context whenever the SYCL memory object
 is first accessed there:
 
 ```
-  cl::sycl::buffer<int, 1> buf{cl::sycl::range<1>(1)}; // No allocation here
+  sycl::buffer<int, 1> buf{sycl::range<1>(1)}; // No allocation here
 
-  cl::sycl::queue q;
-  q.submit([&](cl::sycl::handler &cgh){
+  sycl::queue q;
+  q.submit([&](sycl::handler &cgh){
     // First access to buf in q's context: allocate memory
-    auto acc = buf.get_access<cl::sycl::access::mode::read_write>(cgh);
+    auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
 	...
   });
 
   // First access to buf on host (assuming q is not host): allocate memory
-  auto acc = buf.get_access<cl::sycl::access::mode::read_write>();
+  auto acc = buf.get_access<sycl::access::mode::read_write>();
 ```
 
 In the DPCPP execution graph these allocations are represented by allocation
-command nodes (`cl::sycl::detail::AllocaCommand`). A finished allocation
+command nodes (`sycl::detail::AllocaCommand`). A finished allocation
 command means that the associated memory object is ready for its first use in
 that context, but for host allocation commands it might be the case that no
 actual memory allocation takes place: either because it is possible to reuse the
@@ -31,11 +31,11 @@ data pointer provided by the user:
 
 ```
   int val;
-  cl::sycl::buffer<int, 1> buf{&val, cl::sycl::range<1>(1)};
+  sycl::buffer<int, 1> buf{&val, sycl::range<1>(1)};
 
   // An alloca command is created, but it does not allocate new memory: &val
   // is reused instead.
-  auto acc = buf.get_access<cl::sycl::access::mode::read_write>();
+  auto acc = buf.get_access<sycl::access::mode::read_write>();
 ```
 
 Or because a mapped host pointer obtained from a native device memory object
diff --git a/sycl/doc/design/OptionalDeviceFeatures.md b/sycl/doc/design/OptionalDeviceFeatures.md
index 64250ad12f648..eb61ed181be5a 100644
--- a/sycl/doc/design/OptionalDeviceFeatures.md
+++ b/sycl/doc/design/OptionalDeviceFeatures.md
@@ -403,9 +403,9 @@ name.  The format looks like this:
 
 ```
 !intel_types_that_use_aspects = !{!0, !1, !2}
-!0 = !{!"class.cl::sycl::detail::half_impl::half", i32 8}
-!1 = !{!"class.cl::sycl::amx_type", i32 9}
-!2 = !{!"class.cl::sycl::other_type", i32 8, i32 9}
+!0 = !{!"class.sycl::detail::half_impl::half", i32 8}
+!1 = !{!"class.sycl::amx_type", i32 9}
+!2 = !{!"class.sycl::other_type", i32 8, i32 9}
 ```
 
 The value of the `!intel_types_that_use_aspects` metadata is a list of unnamed
@@ -415,8 +415,8 @@ starts with a string giving the name of the type which is followed by a list of
 `i32` constants where each constant is a value from `enum class aspect` telling
 the numerical value of an aspect from the type's
 `[[sycl_detail::uses_aspects()]]` attribute.  In the example above, the type
-`cl::sycl::detail::half_impl::half` uses an aspect whose numerical value is
-`8` and the type `cl::sycl::other_type` uses two aspects `8` and `9`.
+`sycl::detail::half_impl::half` uses an aspect whose numerical value is
+`8` and the type `sycl::other_type` uses two aspects `8` and `9`.
 
 **NOTE**: The reason we choose this representation is because LLVM IR does not
 allow metadata to be attached directly to types.  This representation works
diff --git a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
index 4244b687b5a4a..0724c8112c26e 100644
--- a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
+++ b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
@@ -97,7 +97,7 @@ language constructs (2) The instrumentation that handles capturing the
 relevant metadata.
 
 1. In order to capture end-user source code information, we have implemented
-`cl::sycl::detail::code_location` class that uses the builtin functions
+`sycl::detail::code_location` class that uses the builtin functions
 in the compiler. However, equivalent implementations are unavailable on
 Windows and separate cross-platform implementation might be used in the
 future. To mitigate this, the Windows implementation will always report
diff --git a/sycl/doc/design/SpecializationConstants.md b/sycl/doc/design/SpecializationConstants.md
index 56bc9586dee6f..77cc0a55c1513 100644
--- a/sycl/doc/design/SpecializationConstants.md
+++ b/sycl/doc/design/SpecializationConstants.md
@@ -34,10 +34,10 @@ struct A {
 
 struct POD {
   A a[2];
-  // FIXME: cl::sycl::vec class is not a POD type in our implementation by some
+  // FIXME: sycl::vec class is not a POD type in our implementation by some
   // reason, but there are no limitations for vector types from spec constatns
   // design point of view.
-  cl::sycl::vec<int, 2> b;
+  sycl::vec<int, 2> b;
 };
 
 class MyInt32Const;
@@ -55,16 +55,16 @@ class MyPODConst;
   sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       p.set_spec_constant<MyInt32Const>(rt_val);
 
-  cl::sycl::ONEAPI::experimental::spec_constant<POD, MyPODConst> pod =
+  sycl::ONEAPI::experimental::spec_constant<POD, MyPODConst> pod =
       p.set_spec_constant<MyPODConst>(gold);
 
   p.build_with_kernel_type<MyKernel>();
   sycl::buffer<int, 1> buf(vec.data(), vec.size());
   sycl::buffer<POD, 1> buf(vec_pod.data(), vec_pod.size());
 
-  q.submit([&](cl::sycl::handler &cgh) {
-    auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
-    auto acc_pod = buf.get_access<cl::sycl::access::mode::write>(cgh);
+  q.submit([&](sycl::handler &cgh) {
+    auto acc = buf.get_access<sycl::access::mode::write>(cgh);
+    auto acc_pod = buf.get_access<sycl::access::mode::write>(cgh);
     cgh.single_task<MyKernel>(
         p.get_kernel<MyKernel>(),
         [=]() {
@@ -169,7 +169,7 @@ struct A {
 sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
     p.set_spec_constant<MyInt32Const>(rt_val);
 
-cl::sycl::ONEAPI::experimental::spec_constant<A, MyPODConst> pod =
+sycl::ONEAPI::experimental::spec_constant<A, MyPODConst> pod =
     p.set_spec_constant<MyPODConst>(gold);
 // ...
   i32.get();
@@ -335,7 +335,7 @@ struct A {
 sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
     p.set_spec_constant<MyInt32Const>(rt_val);
 
-cl::sycl::ONEAPI::experimental::spec_constant<A, MyPODConst> pod =
+sycl::ONEAPI::experimental::spec_constant<A, MyPODConst> pod =
     p.set_spec_constant<MyPODConst>(gold);
 // ...
   i32.get();
@@ -428,7 +428,7 @@ specialization constant:
 ```
 // user code:
 class MyIn32Constant;
-cl::sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32(0);
+sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32(0);
 // integration header:
 template <> struct sycl::detail::SpecConstantInfo<::MyInt32Const> {
   static constexpr const char* getName() {
diff --git a/sycl/doc/design/fpga_io_pipes_design.rst b/sycl/doc/design/fpga_io_pipes_design.rst
index e017e0c49ba46..5c67729bbf4ef 100644
--- a/sycl/doc/design/fpga_io_pipes_design.rst
+++ b/sycl/doc/design/fpga_io_pipes_design.rst
@@ -13,9 +13,9 @@ Requirements
       static constexpr unsigned id = ID;
     };
     using ethernet_read_pipe =
-        cl::sycl::intel::kernel_readable_io_pipe<ethernet_pipe_id<0>, int, 0>;
+        sycl::intel::kernel_readable_io_pipe<ethernet_pipe_id<0>, int, 0>;
     using ethernet_write_pipe =
-        cl::sycl::intel::kernel_writeable_io_pipe<ethernet_pipe_id<1>, int, 0>;
+        sycl::intel::kernel_writeable_io_pipe<ethernet_pipe_id<1>, int, 0>;
   }
 
   Thus, the user interacts only with vendor-defined pipe objects.
diff --git a/sycl/doc/developer/ContributeToDPCPP.md b/sycl/doc/developer/ContributeToDPCPP.md
index 4db87010da6e6..d761cca73359b 100644
--- a/sycl/doc/developer/ContributeToDPCPP.md
+++ b/sycl/doc/developer/ContributeToDPCPP.md
@@ -57,7 +57,7 @@ end-to-end or SYCL-CTS tests.
 
 #### General guidelines
 
-- Use `sycl::` namespace instead of `cl::sycl::`
+- Use `sycl::` namespace instead of `sycl::`
 
 - Add a helpful comment describing what the test does at the beginning and
   other comments throughout the test as necessary.
@@ -85,7 +85,7 @@ end-to-end or SYCL-CTS tests.
   ```C++
   `#include "Inputs/sycl.hpp"`
   sycl::queue q;
-  q.submit([&](cl::sycl::handler &h) {
+  q.submit([&](sycl::handler &h) {
   h.single_task( { //code });
   });
   ```
diff --git a/sycl/doc/extensions/supported/C-CXX-StandardLibrary.rst b/sycl/doc/extensions/supported/C-CXX-StandardLibrary.rst
index 1712a40e60251..f63ca735ef0e0 100644
--- a/sycl/doc/extensions/supported/C-CXX-StandardLibrary.rst
+++ b/sycl/doc/extensions/supported/C-CXX-StandardLibrary.rst
@@ -121,18 +121,18 @@ Example of usage
    void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
                     std::array<T, N>& VC) {
      // ...
-     cl::sycl::range<1> numOfItems{N};
-     cl::sycl::buffer<T, 1> bufferA(VA.data(), numOfItems);
-     cl::sycl::buffer<T, 1> bufferB(VB.data(), numOfItems);
-     cl::sycl::buffer<T, 1> bufferC(VC.data(), numOfItems);
+     sycl::range<1> numOfItems{N};
+     sycl::buffer<T, 1> bufferA(VA.data(), numOfItems);
+     sycl::buffer<T, 1> bufferB(VB.data(), numOfItems);
+     sycl::buffer<T, 1> bufferC(VC.data(), numOfItems);
 
-     deviceQueue.submit([&](cl::sycl::handler& cgh) {
+     deviceQueue.submit([&](sycl::handler& cgh) {
        auto accessorA = bufferA.template get_access<sycl_read>(cgh);
        auto accessorB = bufferB.template get_access<sycl_read>(cgh);
        auto accessorC = bufferC.template get_access<sycl_write>(cgh);
 
        cgh.parallel_for<class SimpleVadd<T>>(numOfItems,
-       [=](cl::sycl::id<1> wiID) {
+       [=](sycl::id<1> wiID) {
            accessorC[wiID] = accessorA[wiID] + accessorB[wiID];
            assert(accessorC[wiID] > 0 && "Invalid value");
        });
@@ -146,14 +146,14 @@ Example of usage
    #include <sycl/sycl.hpp>
 
    void device_sin_test() {
-     cl::sycl::queue deviceQueue;
-     cl::sycl::range<1> numOfItems{1};
+     sycl::queue deviceQueue;
+     sycl::range<1> numOfItems{1};
      float  result_f = -1.f;
      double result_d = -1.d;
      {
-       cl::sycl::buffer<float, 1> buffer1(&result_f, numOfItems);
-       cl::sycl::buffer<double, 1> buffer2(&result_d, numOfItems);
-       deviceQueue.submit([&](cl::sycl::handler &cgh) {
+       sycl::buffer<float, 1> buffer1(&result_f, numOfItems);
+       sycl::buffer<double, 1> buffer2(&result_d, numOfItems);
+       deviceQueue.submit([&](sycl::handler &cgh) {
          auto res_access1 = buffer1.get_access<sycl_write>(cgh);
          auto res_access2 = buffer2.get_access<sycl_write>(cgh);
          cgh.single_task<class DeviceSin>([=]() {
diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc
index 885cf2a780487..867b3b23a7956 100644
--- a/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc
+++ b/sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc
@@ -310,7 +310,7 @@ When there are accesses to a pipe from different work-items or host threads, the
 
 A pipe can be accessed (read from or written to) from both device code and SYCL host code.  Host-accessible pipes are unidirectional from both the host and device perspectives.  A kernel cannot both read from and write to a pipe, that the host program also reads from or writes to.  Similarly, the host program cannot read from and write to the same pipe type.  A consequence of this rule is that loop-back host pipes are not possible using the same pipe - the host program cannot write to and also read from a pipe.  The compiler, linker, and/or runtime are required to emit an error if any of these conditions are violated.
 
-A pipe accessed by the host can communicate with a kernel in exactly one program executing on one device.  If two instances of a kernel are launched to different devices, or if a kernel is compiled into more than one program object and both are enqueued, then the runtime is required to throw a synchronous `cl::sycl::kernel_error` exception.  The intent of this restriction is that accesses to a pipe on the host provide a point to point link with a kernel executing on a specific device without ambiguity, arbitration, broadcasts, or synchronization across devices.
+A pipe accessed by the host can communicate with a kernel in exactly one program executing on one device.  If two instances of a kernel are launched to different devices, or if a kernel is compiled into more than one program object and both are enqueued, then the runtime is required to throw a synchronous `sycl::kernel_error` exception.  The intent of this restriction is that accesses to a pipe on the host provide a point to point link with a kernel executing on a specific device without ambiguity, arbitration, broadcasts, or synchronization across devices.
 
 The data lifetime rules for pipes apply also to host accessible pipes.  Specifically: data in a pipe exists within an instance of a program object on a device (programming bitstream lifetime on FPGA devices).  Invocation of a kernel from a different program object might destroy all data stored in pipes within the program object associated with the previous kernel(s) executed on the device, and also might destroy any data in pipes being accessed by the host that were communicating with kernel(s) in the program object.
 
diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.md b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.md
index cd33044cb502b..29fcbd05a0501 100644
--- a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.md
+++ b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_device_selector.md
@@ -13,7 +13,7 @@ one FPGA board installed in their system (one device per platform).
 #include <sycl/ext/intel/fpga_device_selector.hpp>
 ...
 // force FPGA hardware device
-cl::sycl::queue deviceQueue{cl::sycl::ext::intel::fpga_selector{}};
+sycl::queue deviceQueue{sycl::ext::intel::fpga_selector{}};
 ...
 ```
 
@@ -22,7 +22,7 @@ cl::sycl::queue deviceQueue{cl::sycl::ext::intel::fpga_selector{}};
 #include <sycl/ext/intel/fpga_device_selector.hpp>
 ...
 // force FPGA emulation device
-cl::sycl::queue deviceQueue{cl::sycl::ext::intel::fpga_emulator_selector{}};
+sycl::queue deviceQueue{sycl::ext::intel::fpga_emulator_selector{}};
 ...
 ```
 
diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md
index 512a7a381288b..6922a2e589191 100644
--- a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md
+++ b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md
@@ -4,38 +4,38 @@
 The Intel FPGA `lsu` class is implemented in `sycl/ext/intel/fpga_lsu.hpp` which
 is included in `sycl/ext/intel/fpga_extensions.hpp`.
 
-The class `cl::sycl::ext::intel::lsu` allows users to explicitly request that the
+The class `sycl::ext::intel::lsu` allows users to explicitly request that the
 implementation of a global memory access is configured in a certain way. The
 class has two member functions, `load()` and `store()` which allow loading from
 and storing to a `multi_ptr`, respectively, and is templated on the following
 4 optional paremeters:
 
-1.  **`cl::sycl::ext::intel::burst_coalesce<B>`, where `B` is a boolean**: request,
+1.  **`sycl::ext::intel::burst_coalesce<B>`, where `B` is a boolean**: request,
 to the extent possible, that a dynamic burst coalescer be implemented when
 `load` or `store` are called. The default value of this parameter is `false`.
-2. **`cl::sycl::ext::intel::cache<N>`, where `N` is an integer greater or equal to
+2. **`sycl::ext::intel::cache<N>`, where `N` is an integer greater or equal to
 0**: request, to the extent possible, that a read-only cache of the specified
 size in bytes be implemented when when `load` is called. It is not allowed to
 use that parameter for `store`. The default value of this parameter is `0`.
-3. **`cl::sycl::ext::intel::statically_coalesce<B>`, where `B` is a boolean**:
+3. **`sycl::ext::intel::statically_coalesce<B>`, where `B` is a boolean**:
 request, to the extent possible, that `load` or `store` accesses, is allowed to
 be statically coalesced with other memory accesses at compile time. The default
 value of this parameter is `true`.
-4. **`cl::sycl::ext::intel::prefetch<B>`, where `B` is a boolean**: request, to the
+4. **`sycl::ext::intel::prefetch<B>`, where `B` is a boolean**: request, to the
 extent possible, that a prefetcher be implemented when `load` is called. It is
 not allowed to use that parameter for `store`. The default value of this
 parameter is `false`.
 
 Currently, not every combination of parameters is allowed due to limitations in
 the backend. The following rules apply:
-1. For `store`, `cl::sycl::ext::intel::cache` must be `0` and
-`cl::sycl::ext::intel::prefetch` must be `false`.
-2. For `load`, if `cl::sycl::ext::intel::cache` is set to a value greater than `0`,
-then `cl::sycl::ext::intel::burst_coalesce` must be set to `true`.
-3. For `load`, exactly one of `cl::sycl::ext::intel::prefetch` and
-`cl::sycl::ext::intel::burst_coalesce` is allowed to be `true`.
-4. For `load`, exactly one of `cl::sycl::ext::intel::prefetch` and
-`cl::sycl::ext::intel::cache` is allowed to be `true`.
+1. For `store`, `sycl::ext::intel::cache` must be `0` and
+`sycl::ext::intel::prefetch` must be `false`.
+2. For `load`, if `sycl::ext::intel::cache` is set to a value greater than `0`,
+then `sycl::ext::intel::burst_coalesce` must be set to `true`.
+3. For `load`, exactly one of `sycl::ext::intel::prefetch` and
+`sycl::ext::intel::burst_coalesce` is allowed to be `true`.
+4. For `load`, exactly one of `sycl::ext::intel::prefetch` and
+`sycl::ext::intel::cache` is allowed to be `true`.
 
 ## Implementation
 
@@ -83,31 +83,31 @@ public:
 ```c++
 #include <sycl/ext/intel/fpga_extensions.hpp>
 ...
-cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
-cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
+sycl::buffer<int, 1> output_buffer(output_data, 1);
+sycl::buffer<int, 1> input_buffer(input_data, 1);
 
-Queue.submit([&](cl::sycl::handler &cgh) {
-  auto output_accessor = output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
-  auto input_accessor = input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
+Queue.submit([&](sycl::handler &cgh) {
+  auto output_accessor = output_buffer.get_access<sycl::access::mode::write>(cgh);
+  auto input_accessor = input_buffer.get_access<sycl::access::mode::read>(cgh);
 
   cgh.single_task<class kernel>([=] {
     auto input_ptr = input_accessor.get_pointer();
     auto output_ptr = output_accessor.get_pointer();
 
     using PrefetchingLSU =
-        cl::sycl::ext::intel::lsu<cl::sycl::ext::intel::prefetch<true>,
-                             cl::sycl::ext::intel::statically_coalesce<false>>;
+        sycl::ext::intel::lsu<sycl::ext::intel::prefetch<true>,
+                              sycl::ext::intel::statically_coalesce<false>>;
 
     using BurstCoalescedLSU =
-        cl::sycl::ext::intel::lsu<cl::sycl::ext::intel::burst_coalesce<false>,
-                             cl::sycl::ext::intel::statically_coalesce<false>>;
+        sycl::ext::intel::lsu<sycl::ext::intel::burst_coalesce<false>,
+                              sycl::ext::intel::statically_coalesce<false>>;
 
     using CachingLSU =
-        cl::sycl::ext::intel::lsu<cl::sycl::ext::intel::burst_coalesce<true>,
-                             cl::sycl::ext::intel::cache<1024>,
-                             cl::sycl::ext::intel::statically_coalesce<true>>;
+        sycl::ext::intel::lsu<sycl::ext::intel::burst_coalesce<true>,
+                              sycl::ext::intel::cache<1024>,
+                              sycl::ext::intel::statically_coalesce<true>>;
 
-    using PipelinedLSU = cl::sycl::ext::intel::lsu<>;
+    using PipelinedLSU = sycl::ext::intel::lsu<>;
 
     int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
     int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_reg.md b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_reg.md
index 62756297b1949..f5df083362aa8 100644
--- a/sycl/doc/extensions/supported/sycl_ext_intel_fpga_reg.md
+++ b/sycl/doc/extensions/supported/sycl_ext_intel_fpga_reg.md
@@ -25,7 +25,7 @@ The implementation is a wrapper class to map fpga_reg function call to a Clang b
 #include <sycl/intel/fpga_extensions.hpp>
 ...
 // force at least one register on data path
-int a = cl::sycl::intel::fpga_reg(a[k]) + b[k];
+int a = sycl::intel::fpga_reg(a[k]) + b[k];
 
 ...
 ```
diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_dot_accumulate.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_dot_accumulate.asciidoc
index de7622f6121ca..e77fce3b5071d 100644
--- a/sycl/doc/extensions/supported/sycl_ext_oneapi_dot_accumulate.asciidoc
+++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_dot_accumulate.asciidoc
@@ -68,7 +68,7 @@ The extension is always enabled. The dot product functionality may be emulated i
 
 === Add to Section 4.13.6 - Geometric Functions
 
-Additionally, the following additional functions are available in the namespace `cl::sycl::intel` on the host and device.
+Additionally, the following additional functions are available in the namespace `sycl::intel` on the host and device.
 
 [cols="4a,4",options="header"]
 |====
diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc
index 389b8c1e09589..cfd0941ec9634 100644
--- a/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc
+++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc
@@ -109,21 +109,21 @@ CG4 doesn't execute until all previous command groups submitted to the same queu
 [source,c++,NoName,linenums]
 ----
 ...
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG1
 });
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG2
 });
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG3
 });
 
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   cgh.ext_oneapi_barrier();
 });
 
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG4
 });
 ...
@@ -134,19 +134,19 @@ Queue.submit([&](cl::sycl::handler& cgh) {
 [source,c++,NoName,linenums]
 ----
 ...
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG1
 });
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG2
 });
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG3
 });
 
 Queue.ext_oneapi_submit_barrier();
 
-Queue.submit([&](cl::sycl::handler& cgh) {
+Queue.submit([&](sycl::handler& cgh) {
   // CG4
 });
 ...
@@ -162,19 +162,19 @@ CG3 requires CG1 (in Queue1) and CG2 (in Queue2) to have completed before it (CG
 [source,c++,NoName,linenums]
 ----
 ...
-auto event_barrier1 = Queue1.submit([&](cl::sycl::handler& cgh) {
+auto event_barrier1 = Queue1.submit([&](sycl::handler& cgh) {
   // CG1
 });
 
-auto event_barrier2 = Queue2.submit([&](cl::sycl::handler& cgh) {
+auto event_barrier2 = Queue2.submit([&](sycl::handler& cgh) {
   // CG2
 });
 
-Queue3.submit([&](cl::sycl::handler& cgh) {
+Queue3.submit([&](sycl::handler& cgh) {
   cgh.ext_oneapi_barrier( std::vector<event>{event_barrier1, event_barrier2} );
 });
 
-Queue3.submit([&](cl::sycl::handler& cgh) {
+Queue3.submit([&](sycl::handler& cgh) {
   // CG3
 });
 ...
@@ -185,17 +185,17 @@ Queue3.submit([&](cl::sycl::handler& cgh) {
 [source,c++,NoName,linenums]
 ----
 ...
-auto event_barrier1 = Queue1.submit([&](cl::sycl::handler& cgh) {
+auto event_barrier1 = Queue1.submit([&](sycl::handler& cgh) {
   // CG1
 });
 
-auto event_barrier2 = Queue2.submit([&](cl::sycl::handler& cgh) {
+auto event_barrier2 = Queue2.submit([&](sycl::handler& cgh) {
   // CG2
 });
 
 Queue3.ext_oneapi_submit_barrier( std::vector<event>{event_barrier1, event_barrier2} );
 
-Queue3.submit([&](cl::sycl::handler& cgh) {
+Queue3.submit([&](sycl::handler& cgh) {
   // CG3
 });
 ...
diff --git a/sycl/include/sycl/detail/image_impl.hpp b/sycl/include/sycl/detail/image_impl.hpp
index ffc5a0efedae9..4a828950a6383 100644
--- a/sycl/include/sycl/detail/image_impl.hpp
+++ b/sycl/include/sycl/detail/image_impl.hpp
@@ -75,7 +75,7 @@ class __SYCL_EXPORT image_impl final : public SYCLMemObjT {
   template <bool B>
   using EnableIfPitchT = typename detail::enable_if_t<B, range<Dimensions - 1>>;
   static_assert(Dimensions >= 1 || Dimensions <= 3,
-                "Dimensions of cl::sycl::image can be 1, 2 or 3");
+                "Dimensions of sycl::image can be 1, 2 or 3");
 
   void setPitches() {
     size_t WHD[3] = {1, 1, 1}; // Width, Height, Depth.
diff --git a/sycl/include/sycl/detail/usm_impl.hpp b/sycl/include/sycl/detail/usm_impl.hpp
index 4265d4df0453d..f7b50d48285e6 100644
--- a/sycl/include/sycl/detail/usm_impl.hpp
+++ b/sycl/include/sycl/detail/usm_impl.hpp
@@ -18,12 +18,11 @@ namespace usm {
 
 __SYCL_EXPORT void *alignedAlloc(size_t Alignment, size_t Bytes,
                                  const context &Ctxt, const device &Dev,
-                                 cl::sycl::usm::alloc Kind,
+                                 sycl::usm::alloc Kind,
                                  const code_location &CL);
 
 __SYCL_EXPORT void *alignedAllocHost(size_t Alignment, size_t Bytes,
-                                     const context &Ctxt,
-                                     cl::sycl::usm::alloc Kind,
+                                     const context &Ctxt, sycl::usm::alloc Kind,
                                      const code_location &CL);
 
 __SYCL_EXPORT void free(void *Ptr, const context &Ctxt,
diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
index ea2305614a3fb..4de7ebb92dec7 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
@@ -73,7 +73,7 @@ uint32_t to_uint32_t(sycl::marray<bfloat16, N> x, size_t start) {
 // argument has 'long' type which is 64-bit wide by the OpenCL C spec. However,
 // by C++ spec long is just at least 32-bit wide, so, you need to ensure (by
 // performing a cast, for example) that if you use %ld specifier, you pass
-// 64-bit argument to the cl::sycl::experimental::printf
+// 64-bit argument to the sycl::experimental::printf
 //
 // - OpenCL spec defines several additional features, like, for example, 'v'
 // modifier which allows to print OpenCL vectors: note that these features are
@@ -100,10 +100,9 @@ inline __SYCL_ALWAYS_INLINE
                               T>
     tanh(T x) __NOEXC {
 #if defined(__NVPTX__)
-  using _ocl_T = cl::sycl::detail::ConvertToOpenCLType_t<T>;
-  _ocl_T arg1 = cl::sycl::detail::convertDataToType<T, _ocl_T>(x);
-  return cl::sycl::detail::convertDataToType<_ocl_T, T>(
-      __clc_native_tanh(arg1));
+  using _ocl_T = sycl::detail::ConvertToOpenCLType_t<T>;
+  _ocl_T arg1 = sycl::detail::convertDataToType<T, _ocl_T>(x);
+  return sycl::detail::convertDataToType<_ocl_T, T>(__clc_native_tanh(arg1));
 #else
   return __sycl_std::__invoke_tanh<T>(x);
 #endif
@@ -115,10 +114,9 @@ inline __SYCL_ALWAYS_INLINE
     sycl::detail::enable_if_t<sycl::detail::is_genfloath<T>::value, T>
     exp2(T x) __NOEXC {
 #if defined(__NVPTX__)
-  using _ocl_T = cl::sycl::detail::ConvertToOpenCLType_t<T>;
-  _ocl_T arg1 = cl::sycl::detail::convertDataToType<T, _ocl_T>(x);
-  return cl::sycl::detail::convertDataToType<_ocl_T, T>(
-      __clc_native_exp2(arg1));
+  using _ocl_T = sycl::detail::ConvertToOpenCLType_t<T>;
+  _ocl_T arg1 = sycl::detail::convertDataToType<T, _ocl_T>(x);
+  return sycl::detail::convertDataToType<_ocl_T, T>(__clc_native_exp2(arg1));
 #else
   return __sycl_std::__invoke_exp2<T>(x);
 #endif
diff --git a/sycl/include/sycl/interop_handle.hpp b/sycl/include/sycl/interop_handle.hpp
index 6f0ee92000d56..fb78f184ed414 100644
--- a/sycl/include/sycl/interop_handle.hpp
+++ b/sycl/include/sycl/interop_handle.hpp
@@ -46,7 +46,7 @@ class interop_handle {
   /// command group, and returns the underlying OpenCL memory object that is
   /// used by the SYCL runtime. If the accessor passed as parameter is not part
   /// of the command group requirements (e.g. it is an unregistered placeholder
-  /// accessor), the exception `cl::sycl::invalid_object` is thrown
+  /// accessor), the exception `sycl::invalid_object` is thrown
   /// asynchronously.
   template <backend Backend = backend::opencl, typename DataT, int Dims,
             access::mode Mode, access::target Target, access::placeholder IsPlh,
diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp
index bce09721e5b1f..69cc74e299878 100644
--- a/sycl/plugins/cuda/pi_cuda.cpp
+++ b/sycl/plugins/cuda/pi_cuda.cpp
@@ -264,7 +264,7 @@ pi_result getInfo<const char *>(size_t param_value_size, void *param_value,
 
 int getAttribute(pi_device device, CUdevice_attribute attribute) {
   int value;
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       cuDeviceGetAttribute(&value, attribute, device->get()) == CUDA_SUCCESS);
   return value;
 }
@@ -568,7 +568,7 @@ pi_result _pi_event::record() {
   try {
     eventId_ = queue_->get_next_event_id();
     if (eventId_ == 0) {
-      cl::sycl::detail::pi::die(
+      sycl::detail::pi::die(
           "Unrecoverable program state reached in event identifier overflow");
     }
     result = PI_CHECK_ERROR(cuEventRecord(evEnd_, stream_));
@@ -721,7 +721,7 @@ pi_result _pi_program::build_program(const char *build_options) {
 /// Note: Another alternative is to add kernel names as metadata, like with
 ///       reqd_work_group_size.
 std::string getKernelNames(pi_program) {
-  cl::sycl::detail::pi::die("getKernelNames not implemented");
+  sycl::detail::pi::die("getKernelNames not implemented");
   return {};
 }
 
@@ -781,7 +781,7 @@ template <typename T> class ReleaseGuard {
         // CUDA error for which it is unclear if the function that reported it
         // succeeded or not. Either way, the state of the program is compromised
         // and likely unrecoverable.
-        cl::sycl::detail::pi::die(
+        sycl::detail::pi::die(
             "Unrecoverable program state reached in cuda_piMemRelease");
       }
     }
@@ -939,7 +939,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Platform info request not implemented");
+  sycl::detail::pi::die("Platform info request not implemented");
   return {};
 }
 
@@ -1004,7 +1004,7 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
   }
   case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              context->get_device()->get()) == CUDA_SUCCESS);
@@ -1055,10 +1055,10 @@ pi_result cuda_piextDeviceSelectBinary(pi_device device,
   (void)device;
 
   if (!binaries) {
-    cl::sycl::detail::pi::die("No list of device images provided");
+    sycl::detail::pi::die("No list of device images provided");
   }
   if (num_binaries < 1) {
-    cl::sycl::detail::pi::die("No binary images in the list");
+    sycl::detail::pi::die("No binary images in the list");
   }
 
   // Look for an image for the NVPTX64 target, and return the first one that is
@@ -1120,11 +1120,11 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: {
     int compute_units = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&compute_units,
                              CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(compute_units >= 0);
+    sycl::detail::pi::assertion(compute_units >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint32(compute_units));
   }
@@ -1136,20 +1136,20 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     size_t return_sizes[max_work_item_dimensions];
 
     int max_x = 0, max_y = 0, max_z = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_x, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_x >= 0);
+    sycl::detail::pi::assertion(max_x >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_y, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_y >= 0);
+    sycl::detail::pi::assertion(max_y >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_z, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_z >= 0);
+    sycl::detail::pi::assertion(max_z >= 0);
 
     return_sizes[0] = size_t(max_x);
     return_sizes[1] = size_t(max_y);
@@ -1161,20 +1161,20 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: {
     size_t return_sizes[max_work_item_dimensions];
     int max_x = 0, max_y = 0, max_z = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_x, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_x >= 0);
+    sycl::detail::pi::assertion(max_x >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_y, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_y >= 0);
+    sycl::detail::pi::assertion(max_y >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_z, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(max_z >= 0);
+    sycl::detail::pi::assertion(max_z >= 0);
 
     return_sizes[0] = size_t(max_x);
     return_sizes[1] = size_t(max_y);
@@ -1185,12 +1185,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
 
   case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
     int max_work_group_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_work_group_size,
                              CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                              device->get()) == CUDA_SUCCESS);
 
-    cl::sycl::detail::pi::assertion(max_work_group_size >= 0);
+    sycl::detail::pi::assertion(max_work_group_size >= 0);
 
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    size_t(max_work_group_size));
@@ -1240,12 +1240,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
     // Number of sub-groups = max block size / warp size + possible remainder
     int max_threads = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&max_threads,
                              CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                              device->get()) == CUDA_SUCCESS);
     int warpSize = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
                              device->get()) == CUDA_SUCCESS);
     int maxWarps = (max_threads + warpSize - 1) / warpSize;
@@ -1256,7 +1256,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // Volta provides independent thread scheduling
     // TODO: Revisit for previous generation GPUs
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              device->get()) == CUDA_SUCCESS);
@@ -1266,7 +1266,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
 
   case PI_DEVICE_INFO_ATOMIC_64: {
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              device->get()) == CUDA_SUCCESS);
@@ -1284,7 +1284,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              device->get()) == CUDA_SUCCESS);
@@ -1299,7 +1299,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: {
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              device->get()) == CUDA_SUCCESS);
@@ -1311,7 +1311,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
     // NVIDIA devices only support one sub-group size (the warp size)
     int warpSize = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
                              device->get()) == CUDA_SUCCESS);
     size_t sizes[1] = {static_cast<size_t>(warpSize)};
@@ -1320,10 +1320,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
     int clock_freq = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&clock_freq, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(clock_freq >= 0);
+    sycl::detail::pi::assertion(clock_freq >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint32(clock_freq) / 1000u);
   }
@@ -1339,8 +1339,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // CL_DEVICE_TYPE_CUSTOM.
 
     size_t global = 0;
-    cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&global, device->get()) ==
-                                    CUDA_SUCCESS);
+    sycl::detail::pi::assertion(cuDeviceTotalMem(&global, device->get()) ==
+                                CUDA_SUCCESS);
 
     auto quarter_global = static_cast<pi_uint32>(global / 4u);
 
@@ -1356,7 +1356,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     if (std::getenv("SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT") != nullptr) {
       enabled = PI_TRUE;
     } else {
-      cl::sycl::detail::pi::cuPrint(
+      sycl::detail::pi::cuPrint(
           "Images are not fully supported by the CUDA BE, their support is "
           "disabled by default. Their partial support can be activated by "
           "setting SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT environment variable at "
@@ -1381,17 +1381,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: {
     // Take the smaller of maximum surface and maximum texture height.
     int tex_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_height,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_height >= 0);
+    sycl::detail::pi::assertion(tex_height >= 0);
     int surf_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_height,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_height >= 0);
+    sycl::detail::pi::assertion(surf_height >= 0);
 
     int min = std::min(tex_height, surf_height);
 
@@ -1400,17 +1400,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1419,17 +1419,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: {
     // Take the smaller of maximum surface and maximum texture height.
     int tex_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_height,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_height >= 0);
+    sycl::detail::pi::assertion(tex_height >= 0);
     int surf_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_height,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_height >= 0);
+    sycl::detail::pi::assertion(surf_height >= 0);
 
     int min = std::min(tex_height, surf_height);
 
@@ -1438,17 +1438,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1457,17 +1457,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: {
     // Take the smaller of maximum surface and maximum texture depth.
     int tex_depth = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_depth,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_depth >= 0);
+    sycl::detail::pi::assertion(tex_depth >= 0);
     int surf_depth = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_depth,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_depth >= 0);
+    sycl::detail::pi::assertion(surf_depth >= 0);
 
     int min = std::min(tex_depth, surf_depth);
 
@@ -1476,17 +1476,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&tex_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&surf_width,
                              CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1510,7 +1510,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
     int mem_base_addr_align = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&mem_base_addr_align,
                              CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
                              device->get()) == CUDA_SUCCESS);
@@ -1548,10 +1548,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: {
     int cache_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&cache_size, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(cache_size >= 0);
+    sycl::detail::pi::assertion(cache_size >= 0);
     // The L2 cache is global to the GPU.
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64(cache_size));
@@ -1559,18 +1559,18 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: {
     size_t bytes = 0;
     // Runtime API has easy access to this value, driver API info is scarse.
-    cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&bytes, device->get()) ==
-                                    CUDA_SUCCESS);
+    sycl::detail::pi::assertion(cuDeviceTotalMem(&bytes, device->get()) ==
+                                CUDA_SUCCESS);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64{bytes});
   }
   case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: {
     int constant_memory = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&constant_memory,
                              CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(constant_memory >= 0);
+    sycl::detail::pi::assertion(constant_memory >= 0);
 
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64(constant_memory));
@@ -1590,32 +1590,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // CUDA has its own definition of "local memory", which maps to OpenCL's
     // "private memory".
     int local_mem_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&local_mem_size,
                              CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(local_mem_size >= 0);
+    sycl::detail::pi::assertion(local_mem_size >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64(local_mem_size));
   }
   case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
     int ecc_enabled = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&ecc_enabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED,
                              device->get()) == CUDA_SUCCESS);
 
-    cl::sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
+    sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
     auto result = static_cast<pi_bool>(ecc_enabled);
     return getInfo(param_value_size, param_value, param_value_size_ret, result);
   }
   case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: {
     int is_integrated = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&is_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED,
                              device->get()) == CUDA_SUCCESS);
 
-    cl::sycl::detail::pi::assertion((is_integrated == 0) |
-                                    (is_integrated == 1));
+    sycl::detail::pi::assertion((is_integrated == 0) | (is_integrated == 1));
     auto result = static_cast<pi_bool>(is_integrated);
     return getInfo(param_value_size, param_value, param_value_size_ret, result);
   }
@@ -1675,9 +1674,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_NAME: {
     static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u;
     char name[MAX_DEVICE_NAME_LENGTH];
-    cl::sycl::detail::pi::assertion(
-        cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, device->get()) ==
-        CUDA_SUCCESS);
+    sycl::detail::pi::assertion(cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH,
+                                                device->get()) == CUDA_SUCCESS);
     return getInfoArray(strlen(name) + 1, param_value_size, param_value,
                         param_value_size_ret, name);
   }
@@ -1713,11 +1711,11 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     int major = 0;
     int minor = 0;
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&major,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
                              device->get()) == CUDA_SUCCESS);
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         cuDeviceGetAttribute(&minor,
                              CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
                              device->get()) == CUDA_SUCCESS);
@@ -1907,7 +1905,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Device info request not implemented");
+  sycl::detail::pi::die("Device info request not implemented");
   return {};
 }
 
@@ -2031,7 +2029,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties,
       break;
     default:
       // Unknown property.
-      cl::sycl::detail::pi::die(
+      sycl::detail::pi::die(
           "Unknown piContextCreate property in property list");
       return PI_ERROR_INVALID_VALUE;
     }
@@ -2306,7 +2304,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) {
     // error for which it is unclear if the function that reported it succeeded
     // or not. Either way, the state of the program is compromised and likely
     // unrecoverable.
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "Unrecoverable program state reached in cuda_piMemRelease");
   }
 
@@ -2382,7 +2380,7 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags,
 }
 
 pi_result cuda_piMemGetInfo(pi_mem, pi_mem_info, size_t, void *, size_t *) {
-  cl::sycl::detail::pi::die("cuda_piMemGetInfo not implemented");
+  sycl::detail::pi::die("cuda_piMemGetInfo not implemented");
 }
 
 /// Gets the native CUDA handle of a PI mem object
@@ -2412,7 +2410,7 @@ pi_result cuda_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle,
                                               pi_context context,
                                               bool ownNativeHandle,
                                               pi_mem *mem) {
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI mem from native handle not implemented");
   return {};
 }
@@ -2488,7 +2486,7 @@ pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Queue info request not implemented");
+  sycl::detail::pi::die("Queue info request not implemented");
   return {};
 }
 
@@ -2605,7 +2603,7 @@ pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle,
   else if (flags == CU_STREAM_NON_BLOCKING)
     properties = __SYCL_PI_CUDA_SYNC_WITH_DEFAULT;
   else
-    cl::sycl::detail::pi::die("Unknown cuda stream");
+    sycl::detail::pi::die("Unknown cuda stream");
 
   std::vector<CUstream> computeCuStreams(1, cuStream);
   std::vector<CUstream> transferCuStreams(0);
@@ -2826,7 +2824,7 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
           arrayDesc.Format != CU_AD_FORMAT_SIGNED_INT32 &&
           arrayDesc.Format != CU_AD_FORMAT_HALF &&
           arrayDesc.Format != CU_AD_FORMAT_FLOAT) {
-        cl::sycl::detail::pi::die(
+        sycl::detail::pi::die(
             "PI CUDA kernels only support images with channel types int32, "
             "uint32, float, and half.");
       }
@@ -2870,7 +2868,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     switch (param_name) {
     case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
       int max_threads = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuFuncGetAttribute(&max_threads,
                              CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                              kernel->get()) == CUDA_SUCCESS);
@@ -2894,7 +2892,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
       // OpenCL LOCAL == CUDA SHARED
       int bytes = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
                              kernel->get()) == CUDA_SUCCESS);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -2903,7 +2901,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
       // Work groups should be multiples of the warp size
       int warpSize = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
                                device->get()) == CUDA_SUCCESS);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -2912,7 +2910,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
       // OpenCL PRIVATE == CUDA LOCAL
       int bytes = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
                              kernel->get()) == CUDA_SUCCESS);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -2920,7 +2918,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     }
     case PI_KERNEL_GROUP_INFO_NUM_REGS: {
       int numRegs = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuFuncGetAttribute(&numRegs, CU_FUNC_ATTRIBUTE_NUM_REGS,
                              kernel->get()) == CUDA_SUCCESS);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -3087,7 +3085,7 @@ pi_result cuda_piEnqueueKernelLaunch(
 pi_result cuda_piEnqueueNativeKernel(pi_queue, void (*)(void *), void *, size_t,
                                      pi_uint32, const pi_mem *, const void **,
                                      pi_uint32, const pi_event *, pi_event *) {
-  cl::sycl::detail::pi::die("Not implemented in CUDA backend");
+  sycl::detail::pi::die("Not implemented in CUDA backend");
   return {};
 }
 
@@ -3113,7 +3111,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags,
   // TODO: check SYCL CTS and spec. May also have to support BGRA
   if (image_format->image_channel_order !=
       pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) {
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "cuda_piMemImageCreate only supports RGBA channel order");
   }
 
@@ -3174,7 +3172,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags,
     pixel_type_size_bytes = 4;
     break;
   default:
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "cuda_piMemImageCreate given unsupported image_channel_data_type");
   }
 
@@ -3257,7 +3255,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags,
 /// \TODO Not implemented
 pi_result cuda_piMemImageGetInfo(pi_mem, pi_image_info, size_t, void *,
                                  size_t *) {
-  cl::sycl::detail::pi::die("cuda_piMemImageGetInfo not implemented");
+  sycl::detail::pi::die("cuda_piMemImageGetInfo not implemented");
   return {};
 }
 
@@ -3273,8 +3271,7 @@ pi_result cuda_piMemRetain(pi_mem mem) {
 ///
 pi_result cuda_piclProgramCreateWithSource(pi_context, pi_uint32, const char **,
                                            const size_t *, pi_program *) {
-  cl::sycl::detail::pi::cuPrint(
-      "cuda_piclProgramCreateWithSource not implemented");
+  sycl::detail::pi::cuPrint("cuda_piclProgramCreateWithSource not implemented");
   return PI_ERROR_INVALID_OPERATION;
 }
 
@@ -3308,7 +3305,7 @@ pi_result cuda_piProgramBuild(pi_program program, pi_uint32 num_devices,
 
 /// \TODO Not implemented
 pi_result cuda_piProgramCreate(pi_context, const void *, size_t, pi_program *) {
-  cl::sycl::detail::pi::die("cuda_piProgramCreate not implemented");
+  sycl::detail::pi::die("cuda_piProgramCreate not implemented");
   return {};
 }
 
@@ -3388,7 +3385,7 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Program info request not implemented");
+  sycl::detail::pi::die("Program info request not implemented");
   return {};
 }
 
@@ -3511,7 +3508,7 @@ pi_result cuda_piProgramGetBuildInfo(pi_program program, pi_device device,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Program Build info request not implemented");
+  sycl::detail::pi::die("Program Build info request not implemented");
   return {};
 }
 
@@ -3577,7 +3574,7 @@ pi_result cuda_piextProgramGetNativeHandle(pi_program program,
 /// \return TBD
 pi_result cuda_piextProgramCreateWithNativeHandle(pi_native_handle, pi_context,
                                                   bool, pi_program *) {
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI program from native handle not implemented");
   return {};
 }
@@ -3631,7 +3628,7 @@ pi_result cuda_piKernelGetSubGroupInfo(
     case PI_KERNEL_MAX_SUB_GROUP_SIZE: {
       // Sub-group size is equivalent to warp size
       int warpSize = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
                                device->get()) == CUDA_SUCCESS);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -3640,7 +3637,7 @@ pi_result cuda_piKernelGetSubGroupInfo(
     case PI_KERNEL_MAX_NUM_SUB_GROUPS: {
       // Number of sub-groups = max block size / warp size + possible remainder
       int max_threads = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           cuFuncGetAttribute(&max_threads,
                              CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                              kernel->get()) == CUDA_SUCCESS);
@@ -3707,8 +3704,7 @@ pi_result cuda_piextProgramSetSpecializationConstant(pi_program, pi_uint32,
                                                      size_t, const void *) {
   // This entry point is only used for native specialization constants (SPIR-V),
   // and the CUDA plugin is AOT only so this entry point is not supported.
-  cl::sycl::detail::pi::die(
-      "Native specialization constants are not supported");
+  sycl::detail::pi::die("Native specialization constants are not supported");
   return {};
 }
 
@@ -3723,7 +3719,7 @@ pi_result cuda_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index,
 // Events
 //
 pi_result cuda_piEventCreate(pi_context, pi_event *) {
-  cl::sycl::detail::pi::die("PI Event Create not implemented in CUDA backend");
+  sycl::detail::pi::die("PI Event Create not implemented in CUDA backend");
 }
 
 pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name,
@@ -3784,17 +3780,17 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Event Profiling info request not implemented");
+  sycl::detail::pi::die("Event Profiling info request not implemented");
   return {};
 }
 
 pi_result cuda_piEventSetCallback(pi_event, pi_int32, pfn_notify, void *) {
-  cl::sycl::detail::pi::die("Event Callback not implemented in CUDA backend");
+  sycl::detail::pi::die("Event Callback not implemented in CUDA backend");
   return PI_SUCCESS;
 }
 
 pi_result cuda_piEventSetStatus(pi_event, pi_int32) {
-  cl::sycl::detail::pi::die("Event Set Status not implemented in CUDA backend");
+  sycl::detail::pi::die("Event Set Status not implemented in CUDA backend");
   return PI_ERROR_INVALID_VALUE;
 }
 
@@ -3803,7 +3799,7 @@ pi_result cuda_piEventRetain(pi_event event) {
 
   const auto refCount = event->increment_reference_count();
 
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       refCount != 0,
       "Reference count overflow detected in cuda_piEventRetain.");
 
@@ -3815,7 +3811,7 @@ pi_result cuda_piEventRelease(pi_event event) {
 
   // double delete or someone is messing with the ref count.
   // either way, cannot safely proceed.
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       event->get_reference_count() != 0,
       "Reference count overflow detected in cuda_piEventRelease.");
 
@@ -4068,7 +4064,7 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) {
 
   // double delete or someone is messing with the ref count.
   // either way, cannot safely proceed.
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       sampler->get_reference_count() != 0,
       "Reference count overflow detected in cuda_piSamplerRelease.");
 
@@ -4447,7 +4443,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR array_desc) {
   case CU_AD_FORMAT_FLOAT:
     return 4;
   default:
-    cl::sycl::detail::pi::die("Invalid image format.");
+    sycl::detail::pi::die("Invalid image format.");
     return 0;
   }
 }
@@ -4728,7 +4724,7 @@ pi_result cuda_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image,
 pi_result cuda_piEnqueueMemImageFill(pi_queue, pi_mem, const void *,
                                      const size_t *, const size_t *, pi_uint32,
                                      const pi_event *, pi_event *) {
-  cl::sycl::detail::pi::die("cuda_piEnqueueMemImageFill not implemented");
+  sycl::detail::pi::die("cuda_piEnqueueMemImageFill not implemented");
   return {};
 }
 
@@ -5161,7 +5157,7 @@ pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
                                  queue->get_context()->get_device()->get()));
       break;
     default:
-      cl::sycl::detail::pi::die("Unknown advice");
+      sycl::detail::pi::die("Unknown advice");
     }
     if (event) {
       result = event_ptr->record();
diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
index 40d1939895ed5..84725166b0d1e 100644
--- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
+++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
@@ -1117,8 +1117,8 @@ _pi_mem::~_pi_mem() {
     Status = CmDevice->DestroySurface(SurfacePtr.RegularImgPtr);
   }
 
-  cl::sycl::detail::pi::assertion(Status == cm_support::CM_SUCCESS &&
-                                  "Surface Deletion Failure from CM_EMU");
+  sycl::detail::pi::assertion(Status == cm_support::CM_SUCCESS &&
+                              "Surface Deletion Failure from CM_EMU");
 
   for (auto mapit = Mappings.begin(); mapit != Mappings.end();) {
     mapit = Mappings.erase(mapit);
diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp
index c3409e52ab0ef..eedb654eed77a 100644
--- a/sycl/plugins/hip/pi_hip.cpp
+++ b/sycl/plugins/hip/pi_hip.cpp
@@ -316,7 +316,7 @@ pi_result getInfo<const char *>(size_t param_value_size, void *param_value,
 
 int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
   int value;
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       hipDeviceGetAttribute(&value, attribute, device->get()) == hipSuccess);
   return value;
 }
@@ -606,7 +606,7 @@ pi_result _pi_event::record() {
   try {
     eventId_ = queue_->get_next_event_id();
     if (eventId_ == 0) {
-      cl::sycl::detail::pi::die(
+      sycl::detail::pi::die(
           "Unrecoverable program state reached in event identifier overflow");
     }
     result = PI_CHECK_ERROR(hipEventRecord(evEnd_, stream_));
@@ -714,7 +714,7 @@ pi_result _pi_program::build_program(const char *build_options) {
 ///       query to PI and use hipModuleGetFunction to check for a kernel.
 std::string getKernelNames(pi_program program) {
   (void)program;
-  cl::sycl::detail::pi::die("getKernelNames not implemented");
+  sycl::detail::pi::die("getKernelNames not implemented");
   return {};
 }
 
@@ -774,7 +774,7 @@ template <typename T> class ReleaseGuard {
         // HIP error for which it is unclear if the function that reported it
         // succeeded or not. Either way, the state of the program is compromised
         // and likely unrecoverable.
-        cl::sycl::detail::pi::die(
+        sycl::detail::pi::die(
             "Unrecoverable program state reached in hip_piMemRelease");
       }
     }
@@ -912,7 +912,7 @@ pi_result hip_piPlatformGetInfo(pi_platform platform,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Platform info request not implemented");
+  sycl::detail::pi::die("Platform info request not implemented");
   return {};
 }
 
@@ -1016,10 +1016,10 @@ pi_result hip_piextDeviceSelectBinary(pi_device device,
                                       pi_uint32 *selected_binary) {
   (void)device;
   if (!binaries) {
-    cl::sycl::detail::pi::die("No list of device images provided");
+    sycl::detail::pi::die("No list of device images provided");
   }
   if (num_binaries < 1) {
-    cl::sycl::detail::pi::die("No binary images in the list");
+    sycl::detail::pi::die("No binary images in the list");
   }
 
   // Look for an image for the HIP target, and return the first one that is
@@ -1100,11 +1100,11 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: {
     int compute_units = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&compute_units,
                               hipDeviceAttributeMultiprocessorCount,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(compute_units >= 0);
+    sycl::detail::pi::assertion(compute_units >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint32(compute_units));
   }
@@ -1116,20 +1116,20 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     size_t return_sizes[max_work_item_dimensions];
 
     int max_x = 0, max_y = 0, max_z = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxBlockDimX,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_x >= 0);
+    sycl::detail::pi::assertion(max_x >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxBlockDimY,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_y >= 0);
+    sycl::detail::pi::assertion(max_y >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxBlockDimZ,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_z >= 0);
+    sycl::detail::pi::assertion(max_z >= 0);
 
     return_sizes[0] = size_t(max_x);
     return_sizes[1] = size_t(max_y);
@@ -1141,20 +1141,20 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: {
     size_t return_sizes[max_work_item_dimensions];
     int max_x = 0, max_y = 0, max_z = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxGridDimX,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_x >= 0);
+    sycl::detail::pi::assertion(max_x >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxGridDimY,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_y >= 0);
+    sycl::detail::pi::assertion(max_y >= 0);
 
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxGridDimZ,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(max_z >= 0);
+    sycl::detail::pi::assertion(max_z >= 0);
 
     return_sizes[0] = size_t(max_x);
     return_sizes[1] = size_t(max_y);
@@ -1165,12 +1165,12 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
 
   case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
     int max_work_group_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_work_group_size,
                               hipDeviceAttributeMaxThreadsPerBlock,
                               device->get()) == hipSuccess);
 
-    cl::sycl::detail::pi::assertion(max_work_group_size >= 0);
+    sycl::detail::pi::assertion(max_work_group_size >= 0);
 
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    size_t(max_work_group_size));
@@ -1220,12 +1220,12 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
     // Number of sub-groups = max block size / warp size + possible remainder
     int max_threads = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&max_threads,
                               hipDeviceAttributeMaxThreadsPerBlock,
                               device->get()) == hipSuccess);
     int warpSize = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
                               device->get()) == hipSuccess);
     int maxWarps = (max_threads + warpSize - 1) / warpSize;
@@ -1236,7 +1236,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // Volta provides independent thread scheduling
     // TODO: Revisit for previous generation GPUs
     int major = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor,
                               device->get()) == hipSuccess);
     bool ifp = (major >= 7);
@@ -1244,7 +1244,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
     int warpSize = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
                               device->get()) == hipSuccess);
     size_t sizes[1] = {static_cast<size_t>(warpSize)};
@@ -1253,10 +1253,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
     int clock_freq = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&clock_freq, hipDeviceAttributeClockRate,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(clock_freq >= 0);
+    sycl::detail::pi::assertion(clock_freq >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint32(clock_freq) / 1000u);
   }
@@ -1272,8 +1272,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // CL_DEVICE_TYPE_HIPSTOM.
 
     size_t global = 0;
-    cl::sycl::detail::pi::assertion(hipDeviceTotalMem(&global, device->get()) ==
-                                    hipSuccess);
+    sycl::detail::pi::assertion(hipDeviceTotalMem(&global, device->get()) ==
+                                hipSuccess);
 
     auto quarter_global = static_cast<pi_uint32>(global / 4u);
 
@@ -1303,16 +1303,16 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: {
     // Take the smaller of maximum surface and maximum texture height.
     int tex_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture2DHeight,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_height >= 0);
+    sycl::detail::pi::assertion(tex_height >= 0);
     int surf_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_height,
                               hipDeviceAttributeMaxTexture2DHeight,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_height >= 0);
+    sycl::detail::pi::assertion(surf_height >= 0);
 
     int min = std::min(tex_height, surf_height);
 
@@ -1321,15 +1321,15 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture2DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture2DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1338,16 +1338,16 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: {
     // Take the smaller of maximum surface and maximum texture height.
     int tex_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture3DHeight,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_height >= 0);
+    sycl::detail::pi::assertion(tex_height >= 0);
     int surf_height = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_height,
                               hipDeviceAttributeMaxTexture3DHeight,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_height >= 0);
+    sycl::detail::pi::assertion(surf_height >= 0);
 
     int min = std::min(tex_height, surf_height);
 
@@ -1356,15 +1356,15 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture3DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture3DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1373,15 +1373,15 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: {
     // Take the smaller of maximum surface and maximum texture depth.
     int tex_depth = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_depth, hipDeviceAttributeMaxTexture3DDepth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_depth >= 0);
+    sycl::detail::pi::assertion(tex_depth >= 0);
     int surf_depth = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_depth, hipDeviceAttributeMaxTexture3DDepth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_depth >= 0);
+    sycl::detail::pi::assertion(surf_depth >= 0);
 
     int min = std::min(tex_depth, surf_depth);
 
@@ -1390,15 +1390,15 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: {
     // Take the smaller of maximum surface and maximum texture width.
     int tex_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture1DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(tex_width >= 0);
+    sycl::detail::pi::assertion(tex_width >= 0);
     int surf_width = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture1DWidth,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(surf_width >= 0);
+    sycl::detail::pi::assertion(surf_width >= 0);
 
     int min = std::min(tex_width, surf_width);
 
@@ -1421,7 +1421,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
     int mem_base_addr_align = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&mem_base_addr_align,
                               hipDeviceAttributeTextureAlignment,
                               device->get()) == hipSuccess);
@@ -1455,10 +1455,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
   case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: {
     int cache_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&cache_size, hipDeviceAttributeL2CacheSize,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(cache_size >= 0);
+    sycl::detail::pi::assertion(cache_size >= 0);
     // The L2 cache is global to the GPU.
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64(cache_size));
@@ -1466,8 +1466,8 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: {
     size_t bytes = 0;
     // Runtime API has easy access to this value, driver API info is scarse.
-    cl::sycl::detail::pi::assertion(hipDeviceTotalMem(&bytes, device->get()) ==
-                                    hipSuccess);
+    sycl::detail::pi::assertion(hipDeviceTotalMem(&bytes, device->get()) ==
+                                hipSuccess);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64{bytes});
   }
@@ -1478,7 +1478,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // memory on AMD GPU may be larger than what can fit in the positive part
     // of a signed integer, so use an unsigned integer and cast the pointer to
     // int*.
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(reinterpret_cast<int *>(&constant_memory),
                               hipDeviceAttributeTotalConstantMemory,
                               device->get()) == hipSuccess);
@@ -1501,32 +1501,31 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
     // HIP has its own definition of "local memory", which maps to OpenCL's
     // "private memory".
     int local_mem_size = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&local_mem_size,
                               hipDeviceAttributeMaxSharedMemoryPerBlock,
                               device->get()) == hipSuccess);
-    cl::sycl::detail::pi::assertion(local_mem_size >= 0);
+    sycl::detail::pi::assertion(local_mem_size >= 0);
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    pi_uint64(local_mem_size));
   }
   case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
     int ecc_enabled = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&ecc_enabled, hipDeviceAttributeEccEnabled,
                               device->get()) == hipSuccess);
 
-    cl::sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
+    sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
     auto result = static_cast<pi_bool>(ecc_enabled);
     return getInfo(param_value_size, param_value, param_value_size_ret, result);
   }
   case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: {
     int is_integrated = 0;
-    cl::sycl::detail::pi::assertion(
+    sycl::detail::pi::assertion(
         hipDeviceGetAttribute(&is_integrated, hipDeviceAttributeIntegrated,
                               device->get()) == hipSuccess);
 
-    cl::sycl::detail::pi::assertion((is_integrated == 0) |
-                                    (is_integrated == 1));
+    sycl::detail::pi::assertion((is_integrated == 0) | (is_integrated == 1));
     auto result = static_cast<pi_bool>(is_integrated);
     return getInfo(param_value_size, param_value, param_value_size_ret, result);
   }
@@ -1586,15 +1585,14 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   case PI_DEVICE_INFO_NAME: {
     static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u;
     char name[MAX_DEVICE_NAME_LENGTH];
-    cl::sycl::detail::pi::assertion(
-        hipDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, device->get()) ==
-        hipSuccess);
+    sycl::detail::pi::assertion(hipDeviceGetName(name, MAX_DEVICE_NAME_LENGTH,
+                                                 device->get()) == hipSuccess);
 
     // On AMD GPUs hipDeviceGetName returns an empty string, so return the arch
     // name instead, this is also what AMD OpenCL devices return.
     if (strlen(name) == 0) {
       hipDeviceProp_t props;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipGetDeviceProperties(&props, device->get()) == hipSuccess);
 
       return getInfoArray(strlen(props.gcnArchName) + 1, param_value_size,
@@ -1790,7 +1788,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Device info request not implemented");
+  sycl::detail::pi::die("Device info request not implemented");
   return {};
 }
 
@@ -1821,7 +1819,7 @@ pi_result hip_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)nativeHandle;
   (void)platform;
   (void)device;
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI device from native handle not implemented");
   return {};
 }
@@ -1878,7 +1876,7 @@ pi_result hip_piContextCreate(const pi_context_properties *properties,
       break;
     default:
       // Unknown property.
-      cl::sycl::detail::pi::die(
+      sycl::detail::pi::die(
           "Unknown piContextCreate property in property list");
       return PI_ERROR_INVALID_VALUE;
     }
@@ -2001,7 +1999,7 @@ pi_result hip_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)devices;
   (void)ownNativeHandle;
   (void)context;
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI context from native handle not implemented");
   return {};
 }
@@ -2144,7 +2142,7 @@ pi_result hip_piMemRelease(pi_mem memObj) {
     // error for which it is unclear if the function that reported it succeeded
     // or not. Either way, the state of the program is compromised and likely
     // unrecoverable.
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "Unrecoverable program state reached in hip_piMemRelease");
   }
 
@@ -2228,7 +2226,7 @@ pi_result hip_piMemGetInfo(pi_mem memObj, pi_mem_info queriedInfo,
   (void)queryOutput;
   (void)writtenQuerySize;
 
-  cl::sycl::detail::pi::die("hip_piMemGetInfo not implemented");
+  sycl::detail::pi::die("hip_piMemGetInfo not implemented");
 }
 
 /// Gets the native HIP handle of a PI mem object
@@ -2283,7 +2281,7 @@ pi_result hip_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)ownNativeHandle;
   (void)mem;
 
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI mem from native handle not implemented");
   return {};
 }
@@ -2352,7 +2350,7 @@ pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Queue info request not implemented");
+  sycl::detail::pi::die("Queue info request not implemented");
   return {};
 }
 
@@ -2460,7 +2458,7 @@ pi_result hip_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)device;
   (void)queue;
   (void)ownNativeHandle;
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI queue from native handle not implemented");
   return {};
 }
@@ -2666,7 +2664,7 @@ pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
       if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
           Format != HIP_AD_FORMAT_SIGNED_INT32 &&
           Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
-        cl::sycl::detail::pi::die(
+        sycl::detail::pi::die(
             "PI HIP kernels only support images with channel types int32, "
             "uint32, float, and half.");
       }
@@ -2848,7 +2846,7 @@ hip_piEnqueueNativeKernel(pi_queue queue, void (*user_func)(void *), void *args,
   (void)event_wait_list;
   (void)event;
 
-  cl::sycl::detail::pi::die("Not implemented in HIP backend");
+  sycl::detail::pi::die("Not implemented in HIP backend");
   return {};
 }
 
@@ -2869,7 +2867,7 @@ pi_result hip_piMemImageCreate(pi_context context, pi_mem_flags flags,
   // TODO: check SYCL CTS and spec. May also have to support BGRA
   if (image_format->image_channel_order !=
       pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) {
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "hip_piMemImageCreate only supports RGBA channel order");
   }
 
@@ -2930,7 +2928,7 @@ pi_result hip_piMemImageCreate(pi_context context, pi_mem_flags flags,
     pixel_type_size_bytes = 4;
     break;
   default:
-    cl::sycl::detail::pi::die(
+    sycl::detail::pi::die(
         "hip_piMemImageCreate given unsupported image_channel_data_type");
   }
 
@@ -3017,7 +3015,7 @@ pi_result hip_piMemImageGetInfo(pi_mem image, pi_image_info param_name,
   (void)param_value;
   (void)param_value_size_ret;
 
-  cl::sycl::detail::pi::die("hip_piMemImageGetInfo not implemented");
+  sycl::detail::pi::die("hip_piMemImageGetInfo not implemented");
   return {};
 }
 
@@ -3041,8 +3039,7 @@ pi_result hip_piclProgramCreateWithSource(pi_context context, pi_uint32 count,
   (void)lengths;
   (void)program;
 
-  cl::sycl::detail::pi::hipPrint(
-      "hip_piclProgramCreateWithSource not implemented");
+  sycl::detail::pi::hipPrint("hip_piclProgramCreateWithSource not implemented");
   return PI_ERROR_INVALID_OPERATION;
 }
 
@@ -3082,7 +3079,7 @@ pi_result hip_piProgramCreate(pi_context context, const void *il, size_t length,
   (void)length;
   (void)res_program;
 
-  cl::sycl::detail::pi::die("hip_piProgramCreate not implemented");
+  sycl::detail::pi::die("hip_piProgramCreate not implemented");
   return {};
 }
 
@@ -3164,7 +3161,7 @@ pi_result hip_piProgramGetInfo(pi_program program, pi_program_info param_name,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Program info request not implemented");
+  sycl::detail::pi::die("Program info request not implemented");
   return {};
 }
 
@@ -3184,7 +3181,7 @@ pi_result hip_piProgramLink(pi_context context, pi_uint32 num_devices,
   (void)pfn_notify;
   (void)user_data;
   (void)ret_program;
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "hip_piProgramLink: linking not supported with hip backend");
   return {};
 }
@@ -3242,7 +3239,7 @@ pi_result hip_piProgramGetBuildInfo(pi_program program, pi_device device,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Program Build info request not implemented");
+  sycl::detail::pi::die("Program Build info request not implemented");
   return {};
 }
 
@@ -3317,7 +3314,7 @@ pi_result hip_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)ownNativeHandle;
   (void)program;
 
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI program from native handle not implemented");
   return {};
 }
@@ -3370,7 +3367,7 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     switch (param_name) {
     case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
       int max_threads = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipFuncGetAttribute(&max_threads,
                               HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                               kernel->get()) == hipSuccess);
@@ -3391,7 +3388,7 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
       // OpenCL LOCAL == HIP SHARED
       int bytes = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
                               kernel->get()) == hipSuccess);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -3400,7 +3397,7 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
       // Work groups should be multiples of the warp size
       int warpSize = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
                                 device->get()) == hipSuccess);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -3409,15 +3406,15 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
     case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
       // OpenCL PRIVATE == HIP LOCAL
       int bytes = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
                               kernel->get()) == hipSuccess);
       return getInfo(param_value_size, param_value, param_value_size_ret,
                      pi_uint64(bytes));
     }
     case PI_KERNEL_GROUP_INFO_NUM_REGS: {
-      cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
-                                "piKernelGetGroupInfo not implemented\n");
+      sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
+                            "piKernelGetGroupInfo not implemented\n");
       return {};
     }
 
@@ -3441,7 +3438,7 @@ pi_result hip_piKernelGetSubGroupInfo(
     case PI_KERNEL_MAX_SUB_GROUP_SIZE: {
       // Sub-group size is equivalent to warp size
       int warpSize = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
                                 device->get()) == hipSuccess);
       return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -3450,7 +3447,7 @@ pi_result hip_piKernelGetSubGroupInfo(
     case PI_KERNEL_MAX_NUM_SUB_GROUPS: {
       // Number of sub-groups = max block size / warp size + possible remainder
       int max_threads = 0;
-      cl::sycl::detail::pi::assertion(
+      sycl::detail::pi::assertion(
           hipFuncGetAttribute(&max_threads,
                               HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
                               kernel->get()) == hipSuccess);
@@ -3524,8 +3521,7 @@ pi_result hip_piextProgramSetSpecializationConstant(pi_program, pi_uint32,
                                                     size_t, const void *) {
   // This entry point is only used for native specialization constants (SPIR-V),
   // and the HIP plugin is AOT only so this entry point is not supported.
-  cl::sycl::detail::pi::die(
-      "Native specialization constants are not supported");
+  sycl::detail::pi::die("Native specialization constants are not supported");
   return {};
 }
 
@@ -3542,7 +3538,7 @@ pi_result hip_piEventCreate(pi_context context, pi_event *event) {
   (void)context;
   (void)event;
 
-  cl::sycl::detail::pi::die("PI Event Create not implemented in HIP backend");
+  sycl::detail::pi::die("PI Event Create not implemented in HIP backend");
 }
 
 pi_result hip_piEventGetInfo(pi_event event, pi_event_info param_name,
@@ -3603,7 +3599,7 @@ pi_result hip_piEventGetProfilingInfo(pi_event event,
   default:
     __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
   }
-  cl::sycl::detail::pi::die("Event Profiling info request not implemented");
+  sycl::detail::pi::die("Event Profiling info request not implemented");
   return {};
 }
 
@@ -3615,7 +3611,7 @@ pi_result hip_piEventSetCallback(pi_event event,
   (void)notify;
   (void)user_data;
 
-  cl::sycl::detail::pi::die("Event Callback not implemented in HIP backend");
+  sycl::detail::pi::die("Event Callback not implemented in HIP backend");
   return PI_SUCCESS;
 }
 
@@ -3623,7 +3619,7 @@ pi_result hip_piEventSetStatus(pi_event event, pi_int32 execution_status) {
   (void)event;
   (void)execution_status;
 
-  cl::sycl::detail::pi::die("Event Set Status not implemented in HIP backend");
+  sycl::detail::pi::die("Event Set Status not implemented in HIP backend");
   return PI_ERROR_INVALID_VALUE;
 }
 
@@ -3632,7 +3628,7 @@ pi_result hip_piEventRetain(pi_event event) {
 
   const auto refCount = event->increment_reference_count();
 
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       refCount != 0, "Reference count overflow detected in hip_piEventRetain.");
 
   return PI_SUCCESS;
@@ -3643,7 +3639,7 @@ pi_result hip_piEventRelease(pi_event event) {
 
   // double delete or someone is messing with the ref count.
   // either way, cannot safely proceed.
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       event->get_reference_count() != 0,
       "Reference count overflow detected in hip_piEventRelease.");
 
@@ -3754,7 +3750,7 @@ pi_result hip_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
   (void)ownNativeHandle;
   (void)event;
 
-  cl::sycl::detail::pi::die(
+  sycl::detail::pi::die(
       "Creation of PI event from native handle not implemented");
   return {};
 }
@@ -3885,7 +3881,7 @@ pi_result hip_piSamplerRelease(pi_sampler sampler) {
 
   // double delete or someone is messing with the ref count.
   // either way, cannot safely proceed.
-  cl::sycl::detail::pi::assertion(
+  sycl::detail::pi::assertion(
       sampler->get_reference_count() != 0,
       "Reference count overflow detected in hip_piSamplerRelease.");
 
@@ -4276,7 +4272,7 @@ static size_t imageElementByteSize(hipArray_Format array_format) {
   default:
     return 0;
   }
-  cl::sycl::detail::pi::die("Invalid iamge format.");
+  sycl::detail::pi::die("Invalid iamge format.");
   return 0;
 }
 
@@ -4577,7 +4573,7 @@ pi_result hip_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,
   (void)event_wait_list;
   (void)event;
 
-  cl::sycl::detail::pi::die("hip_piEnqueueMemImageFill not implemented");
+  sycl::detail::pi::die("hip_piEnqueueMemImageFill not implemented");
   return {};
 }
 
diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp
index dabf01445247e..38a510f02f78f 100644
--- a/sycl/source/detail/kernel_program_cache.hpp
+++ b/sycl/source/detail/kernel_program_cache.hpp
@@ -30,7 +30,7 @@ namespace detail {
 class context_impl;
 class KernelProgramCache {
 public:
-  /// Denotes build error data. The data is filled in from cl::sycl::exception
+  /// Denotes build error data. The data is filled in from sycl::exception
   /// class instance.
   struct BuildError {
     std::string Msg;
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 98e98767cc0ca..ebb31d6cd96a8 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -901,7 +901,7 @@ ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId,
   pi_uint32 ImgInd = 0;
   RTDeviceBinaryImage *Img = nullptr;
 
-  // TODO: There may be cases with cl::sycl::program class usage in source code
+  // TODO: There may be cases with sycl::program class usage in source code
   // that will result in a multi-device context. This case needs to be handled
   // here or at the program_impl class level
 
diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp
index 2164f4e9d2900..789734b8bd896 100644
--- a/sycl/source/detail/scheduler/graph_processor.cpp
+++ b/sycl/source/detail/scheduler/graph_processor.cpp
@@ -26,7 +26,7 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event,
                                              std::vector<Command *> &ToCleanUp,
                                              bool LockTheLock) {
   Command *Cmd = getCommand(Event);
-  // Command can be nullptr if user creates cl::sycl::event explicitly or the
+  // Command can be nullptr if user creates sycl::event explicitly or the
   // event has been waited on by another thread
   if (!Cmd)
     return;
diff --git a/sycl/test/basic_tests/is_device_copyable_neg.cpp b/sycl/test/basic_tests/is_device_copyable_neg.cpp
index 4791e4e6e9ffe..2b0966a8f6a9a 100644
--- a/sycl/test/basic_tests/is_device_copyable_neg.cpp
+++ b/sycl/test/basic_tests/is_device_copyable_neg.cpp
@@ -6,7 +6,7 @@
 
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct A {
   int i;
diff --git a/sycl/test/basic_tests/vectors/ctad_fail.cpp b/sycl/test/basic_tests/vectors/ctad_fail.cpp
index 72cf96b42bbb9..6046d08e58698 100644
--- a/sycl/test/basic_tests/vectors/ctad_fail.cpp
+++ b/sycl/test/basic_tests/vectors/ctad_fail.cpp
@@ -1,8 +1,6 @@
 // RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected %s
 #include <sycl/sycl.hpp>
 
-namespace sycl = cl::sycl;
-
 int main() {
   sycl::vec v(1, .1); // expected-error {{no viable constructor or deduction guide for deduction of template arguments of 'vec'}}
 }
diff --git a/sycl/test/regression/fsycl-host-compiler-win.cpp b/sycl/test/regression/fsycl-host-compiler-win.cpp
index 65f4dd52f9a38..d149d87c10aa4 100644
--- a/sycl/test/regression/fsycl-host-compiler-win.cpp
+++ b/sycl/test/regression/fsycl-host-compiler-win.cpp
@@ -22,7 +22,7 @@
 #error predefined macro not set
 #endif // DEFINE_CHECK
 
-using namespace cl::sycl;
+using namespace sycl;
 
 int main() {
   int data[] = {0, 0, 0};
diff --git a/sycl/test/regression/multi_targeting.cpp b/sycl/test/regression/multi_targeting.cpp
index a93143413c842..81588dfe1fe46 100644
--- a/sycl/test/regression/multi_targeting.cpp
+++ b/sycl/test/regression/multi_targeting.cpp
@@ -10,7 +10,7 @@
 
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 int main() {
   sycl::queue q;
diff --git a/sycl/unittests/pi/TestGetPlatforms.hpp b/sycl/unittests/pi/TestGetPlatforms.hpp
index 8aeb3a26ecb8c..c089bad858a42 100644
--- a/sycl/unittests/pi/TestGetPlatforms.hpp
+++ b/sycl/unittests/pi/TestGetPlatforms.hpp
@@ -11,16 +11,15 @@
 #include <vector>
 
 namespace pi {
-inline std::vector<cl::sycl::platform> getPlatformsWithName(const char *name) {
-  std::vector<cl::sycl::platform> platforms =
-      cl::sycl::platform::get_platforms();
+inline std::vector<sycl::platform> getPlatformsWithName(const char *name) {
+  std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
 
   // Remove platforms that have no devices or doesn't contain the name
   auto end =
       std::remove_if(platforms.begin(), platforms.end(),
-                     [=](const cl::sycl::platform &platform) -> bool {
+                     [=](const sycl::platform &platform) -> bool {
                        const std::string platformName =
-                           platform.get_info<cl::sycl::info::platform::name>();
+                           platform.get_info<sycl::info::platform::name>();
                        return platformName.find(name) == std::string::npos ||
                               platform.get_devices().size() == 0;
                      });
diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp
index a8a695c11335c..15f7f7d2651b1 100644
--- a/sycl/unittests/pi/cuda/test_base_objects.cpp
+++ b/sycl/unittests/pi/cuda/test_base_objects.cpp
@@ -21,7 +21,7 @@
 
 const unsigned int LATEST_KNOWN_CUDA_DRIVER_API_VERSION = 3020u;
 
-using namespace cl::sycl;
+using namespace sycl;
 
 class CudaBaseObjectsTest : public ::testing::Test {
 protected:
diff --git a/sycl/unittests/pi/cuda/test_commands.cpp b/sycl/unittests/pi/cuda/test_commands.cpp
index 453ca8c5e7705..5a57aa8471f13 100644
--- a/sycl/unittests/pi/cuda/test_commands.cpp
+++ b/sycl/unittests/pi/cuda/test_commands.cpp
@@ -17,7 +17,7 @@
 #include <sycl/detail/pi.hpp>
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaCommandsTest : public ::testing::Test {
 
diff --git a/sycl/unittests/pi/cuda/test_contexts.cpp b/sycl/unittests/pi/cuda/test_contexts.cpp
index 30ec8eb7699a3..d021081716b9a 100644
--- a/sycl/unittests/pi/cuda/test_contexts.cpp
+++ b/sycl/unittests/pi/cuda/test_contexts.cpp
@@ -21,7 +21,7 @@
 #include <sycl/detail/pi.hpp>
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaContextsTest : public ::testing::Test {
 
diff --git a/sycl/unittests/pi/cuda/test_device.cpp b/sycl/unittests/pi/cuda/test_device.cpp
index 25bbd2fd80d91..e426cb170673c 100644
--- a/sycl/unittests/pi/cuda/test_device.cpp
+++ b/sycl/unittests/pi/cuda/test_device.cpp
@@ -16,7 +16,7 @@
 #include <sycl/detail/pi.hpp>
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaDeviceTests : public ::testing::Test {
 
diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp
index d6dd70ae27513..903d44043cda6 100644
--- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp
+++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp
@@ -15,7 +15,7 @@
 
 #include <iostream>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaInteropGetNativeTests : public ::testing::TestWithParam<platform> {
 
diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp
index c6273b35e6bb7..d485bb218a1dc 100644
--- a/sycl/unittests/pi/cuda/test_kernels.cpp
+++ b/sycl/unittests/pi/cuda/test_kernels.cpp
@@ -19,7 +19,7 @@
 // PI CUDA kernels carry an additional argument for the implicit global offset.
 #define NUM_IMPLICIT_ARGS 1
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaKernelsTest : public ::testing::Test {
 
diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp
index deb0b6347a268..46fc4a007526d 100644
--- a/sycl/unittests/pi/cuda/test_mem_obj.cpp
+++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp
@@ -18,7 +18,7 @@
 #include <sycl/detail/pi.hpp>
 #include <sycl/sycl.hpp>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaTestMemObj : public ::testing::Test {
 
diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp
index feffa0833a632..f9ce627d126ad 100644
--- a/sycl/unittests/pi/cuda/test_primary_context.cpp
+++ b/sycl/unittests/pi/cuda/test_primary_context.cpp
@@ -16,7 +16,7 @@
 
 #include <iostream>
 
-using namespace cl::sycl;
+using namespace sycl;
 
 struct CudaPrimaryContextTests : public ::testing::TestWithParam<platform> {
 
diff --git a/sycl/unittests/pi/cuda/test_sampler_properties.cpp b/sycl/unittests/pi/cuda/test_sampler_properties.cpp
index 3814e74a4d9b3..2c324dccf7520 100644
--- a/sycl/unittests/pi/cuda/test_sampler_properties.cpp
+++ b/sycl/unittests/pi/cuda/test_sampler_properties.cpp
@@ -17,7 +17,7 @@
 
 namespace {
 
-using namespace cl::sycl;
+using namespace sycl;
 
 class SamplerPropertiesTest
     : public ::testing::TestWithParam<std::tuple<