Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add test for AllocaCommand cleanup #466

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions SYCL/Scheduler/AllocaCleanup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER

#include <sys/resource.h>
#include <oneapi/dpl/execution>
#include <oneapi/dpl/numeric>
#include <iostream>
#include <vector>
#include "CL/sycl.hpp"

template<typename RealType>
void cross_product (sycl::queue& q,
int n, int p,
sycl::buffer<RealType, 1> data_buf,
sycl::buffer<RealType, 1> w_buf,
sycl::buffer<RealType, 1> mean_buf,
sycl::buffer<RealType, 1> cross_product_buf)
{
std::cout << '.';
sycl::buffer<RealType, 1> helper_buf(n * p, sycl::no_init); // props

q.submit([&] (auto &h) {
sycl::accessor data_acc(data_buf, h, sycl::read_only);
sycl::accessor helper_acc(helper_buf, h, sycl::write_only);
sycl::accessor mean_acc(mean_buf, h, sycl::read_only);
h.parallel_for(sycl::range<1>(n * p), [=] (sycl::item<1> item) {
int ij = item.get_id();
helper_acc[ij] = data_acc[ij] - mean_acc[ij / n];
});
});
q.wait();

std::vector<sycl::buffer<RealType, 1>> sub_helper; // props
for (int i = 0; i < p; i++) {
sub_helper.push_back(sycl::buffer<RealType, 1>(helper_buf, i*n, n));
}

auto policy = oneapi::dpl::execution::make_device_policy(q);
auto w_begin = oneapi::dpl::begin(w_buf);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it's better to avoid dependencies on non SYCL APIs in these tests.
Tagging @vladimirlaz

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to avoid extra dependencies in these tests? Is it possible to remove the use of DPL?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added is the reproducer for the sporadic segfaults fixed in intel/llvm#4573. I agree that having dependencies to other moving components in SYCL tests is not a good idea, however this was the only reproducer I have at hand. I'll check to see if the issue reproduces even after removing the DPL dependencies.

for (int i = 0; i < p; i++) {
auto helper_i = oneapi::dpl::begin(sub_helper[i]);

for (int j = i + 1; j < p; j++) {
auto helper_j = oneapi::dpl::begin(sub_helper[j]);
auto zip_begin = oneapi::dpl::make_zip_iterator(w_begin, helper_i, helper_j);
RealType sum = std::transform_reduce(policy, zip_begin, zip_begin + n, RealType{0}, std::plus<>(),
[](auto x) { return std::get<0>(x) * std::get<1>(x) * std::get<2>(x); });
{
sycl::host_accessor h_cross_product(cross_product_buf);
h_cross_product[i*p+j] = sum;
h_cross_product[i+j*p] = sum;
}
}
auto zip_begin = oneapi::dpl::make_zip_iterator(w_begin, helper_i);
RealType sum = std::transform_reduce(policy, zip_begin, zip_begin + n, RealType{0}, std::plus<>(),
[](auto x) { return std::get<0>(x) * std::get<1>(x) * std::get<1>(x); });
{
sycl::host_accessor h_cross_product(cross_product_buf);
h_cross_product[i*p+i] = sum;
}
}
q.wait();
}

int main() {
const int p = 30; // dimensions
const int n = 100; // observations
const int n_runs = 20;
std::vector<float> data(n * p), w(n, 1), not_mean(p, 1);
// data[i, j] = data[i * n + j]
for (auto& x : data) x = float(std::rand()) / float(RAND_MAX);

sycl::buffer<float, 1> data_buf(data.data(), data.size());
sycl::buffer<float, 1> w_buf(w.data(), w.size());
sycl::buffer<float, 1> cross_product_buf(p * p);
sycl::buffer<float, 1> not_mean_buf(not_mean.data(), not_mean.size());

sycl::queue q(sycl::gpu_selector{});

std::cout << "Running on: " << q.get_device().get_info<sycl::info::device::name>() << std::endl;

std::cout << "Start of parallel computing ";

for (int i = 0; i < n_runs; i++) {
cross_product(q, n, p, data_buf, w_buf, not_mean_buf, cross_product_buf);
q.wait();
}

std::cout << "SUCCESS" << std::endl;

return 0;
}

// CHECK:---> piQueueRelease
// CHECK:---> piContextRelease
// CHECK:---> piKernelRelease
// CHECK:---> piProgramRelease