[SYCL] Implement braced-init-list or a number as range for queue::parallel_for

rarutyun · rarutyun · commit d59bfddca8c9 · 2020-06-19T16:12:13.000+03:00
Modification:
    Make three different overloads for queue::parallel for to support
range implicit conversion from number or braced-init-list
    Add tests for queue::parallel_for calls with generic lambda

Signed-off-by: Ruslan Arutyunyan &lt;ruslan.arutyunyan@intel.com&gt;
diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp
@@ -432,10 +432,9 @@ class __SYCL_EXPORT queue {
   /// \param NumWorkItems is a range that specifies the work space of the kernel
   /// \param KernelFunc is the Kernel functor or lambda
   /// \param CodeLoc contains the code location of user code
-  template <typename KernelName = detail::auto_name, typename KernelType,
-            int Dims>
+  template <typename KernelName = detail::auto_name, typename KernelType>
   event parallel_for(
-      range<Dims> NumWorkItems, KernelType KernelFunc
+      range<1> NumWorkItems, KernelType KernelFunc
 #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
       ,
       const detail::code_location &CodeLoc = detail::code_location::current()
@@ -444,12 +443,47 @@ class __SYCL_EXPORT queue {
 #ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
     const detail::code_location &CodeLoc = {};
 #endif
-    return submit(
-        [&](handler &CGH) {
-          CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
-                                                            KernelFunc);
-        },
-        CodeLoc);
+    return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc);
+  }
+
+  /// parallel_for version with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType>
+  event parallel_for(
+      range<2> NumWorkItems, KernelType KernelFunc
+#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
+      ,
+      const detail::code_location &CodeLoc = detail::code_location::current()
+#endif
+  ) {
+#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
+    const detail::code_location &CodeLoc = {};
+#endif
+    return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc);
+  }
+
+  /// parallel_for version with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType>
+  event parallel_for(
+      range<3> NumWorkItems, KernelType KernelFunc
+#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
+      ,
+      const detail::code_location &CodeLoc = detail::code_location::current()
+#endif
+  ) {
+#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
+    const detail::code_location &CodeLoc = {};
+#endif
+    return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc);
   }
 
   /// parallel_for version with a kernel represented as a lambda + range that
@@ -716,6 +750,25 @@ class __SYCL_EXPORT queue {
   /// A template-free version of submit.
   event submit_impl(function_class<void(handler &)> CGH, queue secondQueue,
                     const detail::code_location &CodeLoc);
+
+  /// parallel_for_impl with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType,
+            int Dims>
+  event parallel_for_impl(
+      range<Dims> NumWorkItems, KernelType KernelFunc,
+      const detail::code_location &CodeLoc = detail::code_location::current()) {
+    return submit(
+        [&](handler &CGH) {
+          CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
+                                                            KernelFunc);
+        },
+        CodeLoc);
+  }
 };
 
 } // namespace sycl
diff --git a/sycl/test/basic_tests/queue/queue.cpp b/sycl/test/basic_tests/queue/queue.cpp
diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp
@@ -0,0 +1,72 @@
+// UNSUPPORTED: cuda
+// CUDA does not support unnamed lambdas.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t.out
+// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for generic lambda -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===------------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+#include <iostream>
+#include <type_traits>
+
+int main() {
+  sycl::queue q{};
+  auto dev = q.get_device();
+  auto ctx = q.get_context();
+  constexpr int N = 8;
+
+  if (dev.get_info<sycl::info::device::usm_shared_allocations>()) {
+    auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));
+
+    for (int i = 0; i < N; i++) {
+      A[i] = 1;
+    }
+
+    q.parallel_for(N, [=](auto i) {
+      static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                    "lambda arg type is unexpected");
+      A[i]++;
+    });
+
+    q.parallel_for<class Foo>({N}, [=](auto i) {
+      static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                    "lambda arg type is unexpected");
+      A[i]++;
+    });
+
+    sycl::id<1> offset(0);
+    q.parallel_for<class Baz>(sycl::range<1>{N}, offset, [=](auto i) {
+      static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                    "lambda arg type is unexpected");
+      A[i]++;
+    });
+
+    sycl::nd_range<1> NDR(sycl::range<1>{N}, sycl::range<1>{2});
+    q.parallel_for<class NDFoo>(NDR, [=](auto nd_i) {
+      static_assert(std::is_same<decltype(nd_i), sycl::nd_item<1>>::value,
+                    "lambda arg type is unexpected");
+      auto i = nd_i.get_global_id(0);
+      A[i]++;
+    });
+
+    q.wait();
+
+    for (int i = 0; i < N; i++) {
+      if (A[i] != 5)
+        return 1;
+    }
+    sycl::free(A, ctx);
+  }
+
+  return 0;
+}
diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp
@@ -0,0 +1,81 @@
+// UNSUPPORTED: cuda
+// CUDA does not support unnamed lambdas.
+//
+// RUN: %clangxx -fsycl -fsyntax-only -fsycl-unnamed-lambda %s -o %t.out
+
+//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for interface test -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===------------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+#include <iostream>
+#include <type_traits>
+
+template <std::size_t... Is>
+void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
+                     sycl::range<sizeof...(Is)> *) {
+  constexpr auto dims = sizeof...(Is);
+
+  q.parallel_for(sycl::range<dims>{Is...}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<dims>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+template <std::size_t... Is>
+void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
+                     sycl::nd_range<sizeof...(Is)> *) {
+  constexpr auto dims = sizeof...(Is);
+
+  sycl::nd_range<dims> ndr{sycl::range<dims>{Is...}, sycl::range<dims>{Is...}};
+  q.parallel_for(ndr, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::nd_item<dims>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+template <template <int> class Range, std::size_t Dims>
+void test_range(sycl::queue q) {
+  test_range_impl(q, std::make_index_sequence<Dims>{},
+                  static_cast<Range<Dims> *>(nullptr));
+}
+
+void test_number_braced_init_list(sycl::queue q) {
+  constexpr auto n = 1;
+  q.parallel_for(n, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for({n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for({n, n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<2>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for({n, n, n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<3>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+int main() {
+  sycl::queue q{};
+
+  test_number_braced_init_list(q);
+
+  test_range<sycl::range, 1>(q);
+  test_range<sycl::range, 2>(q);
+  test_range<sycl::range, 3>(q);
+  test_range<sycl::nd_range, 1>(q);
+  test_range<sycl::nd_range, 2>(q);
+  test_range<sycl::nd_range, 3>(q);
+}