Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add tests for discard_events feature #599

Merged
merged 6 commits into from
Dec 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SYCL/AtomicRef @AGindinson
SYCL/Assert @intel/llvm-reviewers-runtime
SYCL/Basic @intel/llvm-reviewers-runtime
SYCL/Config @intel/llvm-reviewers-runtime
SYCL/DiscardEvents @intel/llvm-reviewers-runtime
SYCL/FilterSelector @intel/llvm-reviewers-runtime
SYCL/HostInteropTask @intel/llvm-reviewers-runtime
SYCL/InorderQueue @intel/llvm-reviewers-runtime
Expand Down
106 changes: 106 additions & 0 deletions SYCL/DiscardEvents/discard_events_accessors.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// FIXME unsupported on level_zero until L0 Plugin support becomes available for
// discard_queue_events
// UNSUPPORTED: level_zero
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
// RUN: env SYCL_PI_TRACE=2 %ACC_RUN_PLACEHOLDER %t.out &> %t.txt || true
// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
//
// The test checks that the last parameter is `nullptr` for
// piEnqueueKernelLaunch for USM kernel using local accessor, but
// is not `nullptr` for kernel using buffer accessor.
// {{0|0000000000000000}} is required for various output on Linux and Windows.
//
// CHECK: ---> piEnqueueKernelLaunch(
// CHECK: pi_event * :
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: ---> piEnqueueKernelLaunch(
// CHECK: pi_event * :
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
// CHECK: ---> pi_result : PI_SUCCESS
//
// CHECK: The test passed.

#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>

using namespace cl::sycl;
static constexpr int MAGIC_NUM = -1;
static constexpr size_t BUFFER_SIZE = 16;

void RunKernelHelper(sycl::queue Q,
const std::function<void(int *Harray)> &TestFunction) {
int *Harray = sycl::malloc_host<int>(BUFFER_SIZE, Q);
assert(Harray != nullptr);
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
Harray[i] = MAGIC_NUM;
}

TestFunction(Harray);

// Checks result
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
size_t expected = i + 10;
assert(Harray[i] == expected);
}
free(Harray, Q);
}

int main(int Argc, const char *Argv[]) {

sycl::property_list props{
sycl::property::queue::in_order{},
sycl::ext::oneapi::property::queue::discard_events{}};
sycl::queue Q(props);
sycl::range<1> Range(BUFFER_SIZE);

RunKernelHelper(Q, [&](int *Harray) {
Q.submit([&](sycl::handler &CGH) {
const size_t LocalMemSize = BUFFER_SIZE;
using LocalAccessor =
sycl::accessor<int, 1, sycl::access::mode::read_write,
sycl::access::target::local>;
LocalAccessor LocalAcc(LocalMemSize, CGH);

CGH.parallel_for<class kernel_using_local_memory>(
Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
int *Ptr = LocalAcc.get_pointer();
Ptr[i] = i + 5;
Harray[i] = Ptr[i] + 5;
});
});
Q.wait();
});

RunKernelHelper(Q, [&](int *Harray) {
sycl::buffer<int, 1> Buf(Range);
Q.submit([&](sycl::handler &CGH) {
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
CGH.parallel_for<class kernel_using_buffer_accessor>(
Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
Harray[i] = i + 10;
Acc[i] = i + 20;
});
});
Q.wait();

// Checks result
auto HostAcc = Buf.get_access<sycl::access::mode::read>();
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
size_t expected = i + 20;
assert(HostAcc[i] == expected);
}
});

std::cout << "The test passed." << std::endl;
return 0;
}
57 changes: 57 additions & 0 deletions SYCL/DiscardEvents/discard_events_host_task.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// If necessary, the test can be removed as run_on_host_intel() is deprecated
// and host_task() which should be used instead does not use the PI call
// piEnqueueNativeKernel
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out &> %t.txt
// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
//
// The test checks that the last parameter is `nullptr` for
// piEnqueueNativeKernel.
// {{0|0000000000000000}} is required for various output on Linux and Windows.
//
// CHECK: ---> piEnqueueNativeKernel(
// CHECK: pi_event * :
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
//
// CHECK: The test passed.

#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>

using namespace cl::sycl;

void CheckArray(sycl::queue Q, int *x, size_t buffer_size, int expected) {
Q.wait();
for (size_t i = 0; i < buffer_size; ++i)
assert(x[i] == expected);
}

static constexpr size_t BUFFER_SIZE = 16;

int main(int Argc, const char *Argv[]) {

sycl::property_list Props{
sycl::property::queue::in_order{},
sycl::ext::oneapi::property::queue::discard_events{}};
sycl::queue Q(Props);

int *x = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
assert(x != nullptr);

Q.submit([&](sycl::handler &CGH) {
CGH.run_on_host_intel([=]() {
for (size_t i = 0; i < BUFFER_SIZE; ++i)
x[i] = 8;
});
});
CheckArray(Q, x, BUFFER_SIZE, 8);

Q.wait();
free(x, Q);

std::cout << "The test passed." << std::endl;
return 0;
}
45 changes: 45 additions & 0 deletions SYCL/DiscardEvents/discard_events_kernel_using_assert.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>

using namespace cl::sycl;
static constexpr int MAGIC_NUM = -1;
static constexpr size_t BUFFER_SIZE = 16;

int main(int Argc, const char *Argv[]) {

sycl::property_list Props{
sycl::property::queue::in_order{},
sycl::ext::oneapi::property::queue::discard_events{}};
sycl::queue Q(Props);

sycl::range<1> Range(BUFFER_SIZE);
int *Harray = sycl::malloc_host<int>(BUFFER_SIZE, Q);
if (Harray == nullptr) {
return -1;
}
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
Harray[i] = MAGIC_NUM;
}

Q.submit([&](sycl::handler &CGH) {
CGH.parallel_for<class kernel_using_assert>(
Range, [=](sycl::item<1> itemID) {
size_t i = itemID.get_id(0);
Harray[i] = i + 10;
assert(Harray[i] == i + 10 && "assert message");
});
});
Q.wait();

// Checks result
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
size_t expected = i + 10;
if (Harray[i] != expected)
return -1;
}
free(Harray, Q);

std::cout << "The test passed." << std::endl;
return 0;
}
132 changes: 132 additions & 0 deletions SYCL/DiscardEvents/discard_events_test_queue_ops.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@

#include <CL/sycl.hpp>
#include <cassert>
#include <iostream>

using namespace cl::sycl;

void CheckArray(sycl::queue Q, int *x, size_t buffer_size, int expected) {
Q.wait();
for (size_t i = 0; i < buffer_size; ++i)
assert(x[i] == expected);
}

static constexpr size_t BUFFER_SIZE = 16;

void TestQueueOperations(sycl::queue Q) {
sycl::range<1> Range(BUFFER_SIZE);
auto Dev = Q.get_device();
auto Ctx = Q.get_context();
const int MemAdvice =
((Dev.get_backend() == sycl::backend::ext_oneapi_cuda) ? 1 : 0);
int *x = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
assert(x != nullptr);
int *y = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
assert(y != nullptr);

Q.memset(x, 0, BUFFER_SIZE * sizeof(int));
CheckArray(Q, x, BUFFER_SIZE, 0);

Q.memcpy(y, x, BUFFER_SIZE * sizeof(int));
CheckArray(Q, y, BUFFER_SIZE, 0);

Q.fill(y, 1, BUFFER_SIZE);
CheckArray(Q, y, BUFFER_SIZE, 1);

Q.copy(y, x, BUFFER_SIZE);
CheckArray(Q, x, BUFFER_SIZE, 1);

Q.prefetch(y, BUFFER_SIZE * sizeof(int));
Q.mem_advise(y, BUFFER_SIZE * sizeof(int), MemAdvice);
Q.ext_oneapi_submit_barrier();

Q.single_task([=] {
for (auto i = 0u; i < BUFFER_SIZE; ++i)
y[i] *= 2;
});
CheckArray(Q, y, BUFFER_SIZE, 2);

Q.parallel_for(Range,
[=](sycl::item<1> itemID) { y[itemID.get_id(0)] *= 3; });
CheckArray(Q, y, BUFFER_SIZE, 6);

// Creates new queue with the same context/device, but without discard_events
// property. This queue returns a normal event, not a discarded one.
sycl::queue RegularQ(Ctx, Dev, sycl::property::queue::in_order{});
int *x1 = sycl::malloc_shared<int>(BUFFER_SIZE, RegularQ);
assert(x1 != nullptr);
auto event = RegularQ.memset(x1, 0, BUFFER_SIZE * sizeof(int));

Q.memcpy(y, x, 0, event);
CheckArray(Q, y, BUFFER_SIZE, 6);

Q.wait();
free(x, Q);
free(y, Q);
free(x1, RegularQ);
}

void TestQueueOperationsViaSubmit(sycl::queue Q) {
sycl::range<1> Range(BUFFER_SIZE);
auto Dev = Q.get_device();
auto Ctx = Q.get_context();
const int MemAdvice =
((Dev.get_backend() == sycl::backend::ext_oneapi_cuda) ? 1 : 0);
int *x = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
assert(x != nullptr);
int *y = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
assert(y != nullptr);

Q.submit(
[&](sycl::handler &CGH) { CGH.memset(x, 0, BUFFER_SIZE * sizeof(int)); });
CheckArray(Q, x, BUFFER_SIZE, 0);

Q.submit(
[&](sycl::handler &CGH) { CGH.memcpy(y, x, BUFFER_SIZE * sizeof(int)); });
CheckArray(Q, y, BUFFER_SIZE, 0);

Q.submit([&](sycl::handler &CGH) { CGH.fill(y, 1, BUFFER_SIZE); });
CheckArray(Q, y, BUFFER_SIZE, 1);

Q.submit([&](sycl::handler &CGH) { CGH.copy(y, x, BUFFER_SIZE); });
CheckArray(Q, x, BUFFER_SIZE, 1);

Q.submit(
[&](sycl::handler &CGH) { CGH.prefetch(y, BUFFER_SIZE * sizeof(int)); });
Q.submit([&](sycl::handler &CGH) {
CGH.mem_advise(y, BUFFER_SIZE * sizeof(int), MemAdvice);
});
Q.submit([&](sycl::handler &CGH) { CGH.ext_oneapi_barrier(); });

Q.submit([&](sycl::handler &CGH) {
CGH.single_task([=] {
for (auto i = 0u; i < BUFFER_SIZE; ++i)
y[i] *= 2;
});
});
CheckArray(Q, y, BUFFER_SIZE, 2);

Q.submit([&](sycl::handler &CGH) {
CGH.parallel_for(Range,
[=](sycl::item<1> itemID) { y[itemID.get_id(0)] *= 3; });
});
CheckArray(Q, y, BUFFER_SIZE, 6);

// Creates new queue with the same context/device, but without discard_events
// property. This queue returns a normal event, not a discarded one.
sycl::queue RegularQ(Ctx, Dev, sycl::property::queue::in_order{});
int *x1 = sycl::malloc_shared<int>(BUFFER_SIZE, RegularQ);
assert(x1 != nullptr);
auto event = RegularQ.memset(x1, 0, BUFFER_SIZE * sizeof(int));

Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(event);
CGH.memcpy(y, x, 0);
});
CheckArray(Q, y, BUFFER_SIZE, 6);

Q.wait();
free(x, Q);
free(y, Q);
free(x1, RegularQ);
}
24 changes: 24 additions & 0 deletions SYCL/DiscardEvents/discard_events_using_assert.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// FIXME unsupported on CUDA and HIP until fallback libdevice becomes available
// UNSUPPORTED: cuda || hip
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out &> %t.txt
// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt
// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
// RUN: env SYCL_PI_TRACE=2 %ACC_RUN_PLACEHOLDER %t.out &> %t.txt
// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
//
// The test checks that the last parameter is not `nullptr` for
// piEnqueueKernelLaunch.
// {{0|0000000000000000}} is required for various output on Linux and Windows.
//
// CHECK: ---> piEnqueueKernelLaunch(
// CHECK: pi_event * :
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
// CHECK: ---> pi_result : PI_SUCCESS
//
// CHECK: The test passed.

#include "discard_events_kernel_using_assert.hpp"
Loading