[SYCL][CUDA] Added tests for inorder queues that use USM. (intel#267)

JackAKirk · web-flow · commit 9c22cffe65aa · 2021-05-14T20:35:55.000+03:00
Signed-off-by: JackAKirk &lt;jack.kirk@codeplay.com&gt;
diff --git a/SYCL/InorderQueue/in_order_usm_explicit.cpp b/SYCL/InorderQueue/in_order_usm_explicit.cpp
@@ -0,0 +1,66 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// SYCL in ordered queues explicit USM test.
+// Simple test checking explicit USM functionality using a Queue with the
+// in_order property.
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <CL/sycl.hpp>
+#include <iostream>
+
+using namespace cl::sycl;
+
+int main() {
+
+  {
+    const int dataSize = 32;
+    const size_t numBytes = static_cast<size_t>(dataSize) * sizeof(int);
+
+    int dataA[dataSize] = {0};
+    int dataB[dataSize] = {0};
+
+    queue Queue{property::queue::in_order()};
+
+    auto devicePtrA = malloc_device<int>(numBytes, Queue);
+    Queue.memcpy(devicePtrA, &dataA, numBytes);
+
+    Queue.submit([&](handler &cgh) {
+      auto myRange = range<1>(dataSize);
+      auto myKernel = ([=](id<1> idx) { devicePtrA[idx] = idx[0]; });
+
+      cgh.parallel_for<class ordered_writer>(myRange, myKernel);
+    });
+
+    auto devicePtrB = malloc_device<int>(numBytes, Queue);
+    Queue.memcpy(devicePtrB, &dataB, numBytes);
+
+    Queue.submit([&](handler &cgh) {
+      auto myRange = range<1>(dataSize);
+      auto myKernel = ([=](id<1> idx) { devicePtrB[idx] = devicePtrA[idx]; });
+
+      cgh.parallel_for<class ordered_reader>(myRange, myKernel);
+    });
+
+    Queue.memcpy(&dataB, devicePtrB, numBytes);
+
+    Queue.wait();
+
+    auto ctxt = Queue.get_context();
+    free(devicePtrA, ctxt);
+    free(devicePtrB, ctxt);
+
+    for (int i = 0; i != dataSize; ++i) {
+      if (dataB[i] != i) {
+        std::cout << "Result mismatches " << dataB[i] << " vs expected " << i
+                  << " for index " << i << std::endl;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/SYCL/InorderQueue/in_order_usm_implicit.cpp b/SYCL/InorderQueue/in_order_usm_implicit.cpp
@@ -0,0 +1,66 @@
+// REQUIRES: cuda
+//
+// Currently only CUDA is supported: it would be necessary to generalize
+// mem_advice for other devices before adding support.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// SYCL in ordered queues implicit USM test.
+// Simple test checking implicit USM functionality using a Queue with the
+// in_order property.
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <CL/sycl.hpp>
+#include <iostream>
+
+using namespace cl::sycl;
+
+int main() {
+
+  {
+    queue Queue{property::queue::in_order()};
+
+    // optimize for read only
+    const int mem_advice = 1;
+
+    const int dataSize = 32;
+    const size_t numBytes = static_cast<size_t>(dataSize) * sizeof(int);
+
+    auto dataA = malloc_shared<int>(numBytes, Queue);
+    auto dataB = malloc_shared<int>(numBytes, Queue);
+
+    for (int i = 0; i < dataSize; i++) {
+      dataA[i] = i;
+      dataB[i] = 0;
+    }
+
+    Queue.mem_advise(dataA, numBytes, (pi_mem_advice)mem_advice);
+
+    Queue.submit([&](handler &cgh) {
+      auto myRange = range<1>(dataSize);
+      auto myKernel = ([=](id<1> idx) { dataB[idx] = dataA[idx]; });
+
+      cgh.parallel_for<class ordered_reader>(myRange, myKernel);
+    });
+
+    Queue.wait();
+
+    for (int i = 0; i != dataSize; ++i) {
+      if (dataB[i] != i) {
+        std::cout << "Result mismatches " << dataB[i] << " vs expected " << i
+                  << " for index " << i << std::endl;
+      }
+    }
+
+    auto ctxt = Queue.get_context();
+    free(dataA, ctxt);
+    free(dataB, ctxt);
+  }
+  return 0;
+}