intel · bader · Dec 23, 2021 · Dec 15, 2021 · Dec 15, 2021 · Dec 15, 2021
@@ -11,10 +11,14 @@ using namespace sycl;
 
 // Floating-point types do not support pre- or post-increment
 template <> void add_test<float>(queue q, size_t N) {
-  add_fetch_test<::sycl::ext::oneapi::atomic_ref, float>(q, N);
-  add_fetch_test<::sycl::atomic_ref, float>(q, N);
-  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, float>(q, N);
-  add_plus_equal_test<::sycl::atomic_ref, float>(q, N);
+  add_fetch_test<::sycl::ext::oneapi::atomic_ref,
+                 access::address_space::global_space, float>(q, N);
+  add_fetch_test<::sycl::atomic_ref, access::address_space::global_space,
+                 float>(q, N);
+  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref,
+                      access::address_space::global_space, float>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::global_space,
+                      float>(q, N);
 }
 
 int main() {

@@ -11,7 +11,8 @@ using namespace sycl::ext::oneapi;
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T, typename Difference = T>
+          access::address_space address_space, typename T,
+          typename Difference = T>
 void add_fetch_test(queue q, size_t N) {
   T sum = 0;
   std::vector<T> output(N);
@@ -27,7 +28,7 @@ void add_fetch_test(queue q, size_t N) {
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
         auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
-                             access::address_space::global_space>(sum[0]);
+                             address_space>(sum[0]);
         out[gid] = atm.fetch_add(Difference(1));
       });
     });
@@ -48,7 +49,8 @@ void add_fetch_test(queue q, size_t N) {
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T, typename Difference = T>
+          access::address_space address_space, typename T,
+          typename Difference = T>
 void add_plus_equal_test(queue q, size_t N) {
   T sum = 0;
   std::vector<T> output(N);
@@ -64,7 +66,7 @@ void add_plus_equal_test(queue q, size_t N) {
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
         auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
-                             access::address_space::global_space>(sum[0]);
+                             address_space>(sum[0]);
         out[gid] = atm += Difference(1);
       });
     });
@@ -85,7 +87,8 @@ void add_plus_equal_test(queue q, size_t N) {
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T, typename Difference = T>
+          access::address_space address_space, typename T,
+          typename Difference = T>
 void add_pre_inc_test(queue q, size_t N) {
   T sum = 0;
   std::vector<T> output(N);
@@ -101,7 +104,7 @@ void add_pre_inc_test(queue q, size_t N) {
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
         auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
-                             access::address_space::global_space>(sum[0]);
+                             address_space>(sum[0]);
         out[gid] = ++atm;
       });
     });
@@ -122,7 +125,8 @@ void add_pre_inc_test(queue q, size_t N) {
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T, typename Difference = T>
+          access::address_space address_space, typename T,
+          typename Difference = T>
 void add_post_inc_test(queue q, size_t N) {
   T sum = 0;
   std::vector<T> output(N);
@@ -138,7 +142,7 @@ void add_post_inc_test(queue q, size_t N) {
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
         auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
-                             access::address_space::global_space>(sum[0]);
+                             address_space>(sum[0]);
         out[gid] = atm++;
       });
     });
@@ -159,12 +163,34 @@ void add_post_inc_test(queue q, size_t N) {
 
 template <typename T, typename Difference = T>
 void add_test(queue q, size_t N) {
-  add_fetch_test<::sycl::ext::oneapi::atomic_ref, T, Difference>(q, N);
-  add_fetch_test<::sycl::atomic_ref, T, Difference>(q, N);
-  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, T, Difference>(q, N);
-  add_plus_equal_test<::sycl::atomic_ref, T, Difference>(q, N);
-  add_pre_inc_test<::sycl::ext::oneapi::atomic_ref, T, Difference>(q, N);
-  add_pre_inc_test<::sycl::atomic_ref, T, Difference>(q, N);
-  add_post_inc_test<::sycl::ext::oneapi::atomic_ref, T, Difference>(q, N);
-  add_post_inc_test<::sycl::atomic_ref, T, Difference>(q, N);
+  add_fetch_test<::sycl::ext::oneapi::atomic_ref,
+                 access::address_space::global_space, T, Difference>(q, N);
+  add_fetch_test<::sycl::atomic_ref, access::address_space::global_space, T,
+                 Difference>(q, N);
+  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref,
+                      access::address_space::global_space, T, Difference>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::global_space,
+                      T, Difference>(q, N);
+  add_pre_inc_test<::sycl::ext::oneapi::atomic_ref,
+                   access::address_space::global_space, T, Difference>(q, N);
+  add_pre_inc_test<::sycl::atomic_ref, access::address_space::global_space, T,
+                   Difference>(q, N);
+  add_post_inc_test<::sycl::ext::oneapi::atomic_ref,
+                    access::address_space::global_space, T, Difference>(q, N);
+  add_post_inc_test<::sycl::atomic_ref, access::address_space::global_space, T,
+                    Difference>(q, N);
+}
+
+template <typename T, typename Difference = T>
+void add_generic_test(queue q, size_t N) {
+  add_fetch_test<::sycl::atomic_ref, access::address_space::generic_space, T,
+                 Difference>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::generic_space,
+                      T, Difference>(q, N);
+  add_pre_inc_test<::sycl::atomic_ref, access::address_space::generic_space, T,
+                   Difference>(q, N);
+  add_post_inc_test<::sycl::atomic_ref, access::address_space::generic_space, T,
+                    Difference>(q, N);
+  add_post_inc_test<::sycl::atomic_ref, access::address_space::global_space, T,
+                    Difference>(q, N);
 }
@@ -11,10 +11,14 @@ using namespace sycl;
 
 // Floating-point types do not support pre- or post-increment
 template <> void add_test<double>(queue q, size_t N) {
-  add_fetch_test<::sycl::ext::oneapi::atomic_ref, double>(q, N);
-  add_fetch_test<::sycl::atomic_ref, double>(q, N);
-  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, double>(q, N);
-  add_plus_equal_test<::sycl::atomic_ref, double>(q, N);
+  add_fetch_test<::sycl::ext::oneapi::atomic_ref,
+                 access::address_space::global_space, double>(q, N);
+  add_fetch_test<::sycl::atomic_ref, access::address_space::global_space,
+                 double>(q, N);
+  add_plus_equal_test<::sycl::ext::oneapi::atomic_ref,
+                      access::address_space::global_space, double>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::global_space,
+                      double>(q, N);
 }
 
 int main() {

@@ -0,0 +1,52 @@
+// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \
+// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// CUDA backend has had no support for the generic address space yet
+// XFAIL: cuda
+
+#include "add.h"
+#include <iostream>
+using namespace sycl;
+
+// Floating-point types do not support pre- or post-increment
+template <> void add_generic_test<double>(queue q, size_t N) {
+  add_fetch_test<::sycl::atomic_ref, access::address_space::generic_space,
+                 double>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::generic_space,
+                      double>(q, N);
+}
+
+int main() {
+  queue q;
+
+  if (!q.get_device().has(aspect::atomic64)) {
+    std::cout << "Skipping test\n";
+    return 0;
+  }
+
+  constexpr int N = 32;
+  add_generic_test<double>(q, N);
+
+  // Include long tests if they are 64 bits wide
+  if constexpr (sizeof(long) == 8) {
+    add_generic_test<long>(q, N);
+    add_generic_test<unsigned long>(q, N);
+  }
+
+  // Include long long tests if they are 64 bits wide
+  if constexpr (sizeof(long long) == 8) {
+    add_generic_test<long long>(q, N);
+    add_generic_test<unsigned long long>(q, N);
+  }
+
+  // Include pointer tests if they are 64 bits wide
+  if constexpr (sizeof(char *) == 8) {
+    add_generic_test<char *, ptrdiff_t>(q, N);
+  }
+
+  std::cout << "Test passed." << std::endl;
+}
@@ -0,0 +1,43 @@
+// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \
+// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// CUDA backend has had no support for the generic address space yet
+// XFAIL: cuda
+
+#include "add.h"
+#include <iostream>
+using namespace sycl;
+
+// Floating-point types do not support pre- or post-increment
+template <> void add_generic_test<float>(queue q, size_t N) {
+  add_fetch_test<::sycl::atomic_ref, access::address_space::generic_space,
+                 float>(q, N);
+  add_plus_equal_test<::sycl::atomic_ref, access::address_space::generic_space,
+                      float>(q, N);
+}
+
+int main() {
+  queue q;
+
+  constexpr int N = 32;
+  add_generic_test<int>(q, N);
+  add_generic_test<unsigned int>(q, N);
+  add_generic_test<float>(q, N);
+
+  // Include long tests if they are 32 bits wide
+  if constexpr (sizeof(long) == 4) {
+    add_generic_test<long>(q, N);
+    add_generic_test<unsigned long>(q, N);
+  }
+
+  // Include pointer tests if they are 32 bits wide
+  if constexpr (sizeof(char *) == 4) {
+    add_generic_test<char *, ptrdiff_t>(q, N);
+  }
+
+  std::cout << "Test passed." << std::endl;
+}
@@ -11,12 +11,12 @@ using namespace sycl::ext::oneapi;
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T>
+          access::address_space address_space, typename T>
 class assignment_kernel;
 
 template <template <typename, memory_order, memory_scope, access::address_space>
           class AtomicRef,
-          typename T>
+          access::address_space address_space, typename T>
 void assignment_test(queue q, size_t N) {
   T initial = T(N);
   T assignment = initial;
@@ -25,11 +25,11 @@ void assignment_test(queue q, size_t N) {
     q.submit([&](handler &cgh) {
       auto st =
           assignment_buf.template get_access<access::mode::read_write>(cgh);
-      cgh.parallel_for<assignment_kernel<AtomicRef, T>>(
+      cgh.parallel_for<assignment_kernel<AtomicRef, address_space, T>>(
           range<1>(N), [=](item<1> it) {
             size_t gid = it.get_id(0);
             auto atm = AtomicRef<T, memory_order::relaxed, memory_scope::device,
-                                 access::address_space::global_space>(st[0]);
+                                 address_space>(st[0]);
             atm = T(gid);
           });
     });
@@ -42,6 +42,13 @@ void assignment_test(queue q, size_t N) {
 }
 
 template <typename T> void assignment_test(queue q, size_t N) {
-  assignment_test<::sycl::ext::oneapi::atomic_ref, T>(q, N);
-  assignment_test<::sycl::atomic_ref, T>(q, N);
+  assignment_test<::sycl::ext::oneapi::atomic_ref,
+                  access::address_space::global_space, T>(q, N);
+  assignment_test<::sycl::atomic_ref, access::address_space::global_space, T>(
+      q, N);
+}
+
+template <typename T> void assignment_generic_test(queue q, size_t N) {
+  assignment_test<::sycl::atomic_ref, access::address_space::generic_space, T>(
+      q, N);
 }
@@ -0,0 +1,43 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// CUDA backend has had no support for the generic address space yet
+// XFAIL: cuda
+
+#include "assignment.h"
+#include <iostream>
+using namespace sycl;
+
+int main() {
+  queue q;
+
+  if (!q.get_device().has(aspect::atomic64)) {
+    std::cout << "Skipping test\n";
+    return 0;
+  }
+
+  constexpr int N = 32;
+  assignment_generic_test<double>(q, N);
+
+  // Include long tests if they are 64 bits wide
+  if constexpr (sizeof(long) == 8) {
+    assignment_generic_test<long>(q, N);
+    assignment_generic_test<unsigned long>(q, N);
+  }
+
+  // Include long long tests if they are 64 bits wide
+  if constexpr (sizeof(long long) == 8) {
+    assignment_generic_test<long long>(q, N);
+    assignment_generic_test<unsigned long long>(q, N);
+  }
+
+  // Include pointer tests if they are 64 bits wide
+  if constexpr (sizeof(char *) == 8) {
+    assignment_generic_test<char *>(q, N);
+  }
+
+  std::cout << "Test passed." << std::endl;
+}
@@ -0,0 +1,34 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// CUDA backend has had no support for the generic address space yet
+// XFAIL: cuda
+
+#include "assignment.h"
+#include <iostream>
+using namespace sycl;
+
+int main() {
+  queue q;
+
+  constexpr int N = 32;
+  assignment_generic_test<int>(q, N);
+  assignment_generic_test<unsigned int>(q, N);
+  assignment_generic_test<float>(q, N);
+
+  // Include long tests if they are 32 bits wide
+  if constexpr (sizeof(long) == 4) {
+    assignment_generic_test<long>(q, N);
+    assignment_generic_test<unsigned long>(q, N);
+  }
+
+  // Include pointer tests if they are 32 bits wide
+  if constexpr (sizeof(char *) == 4) {
+    assignment_generic_test<char *>(q, N);
+  }
+
+  std::cout << "Test passed." << std::endl;
+}