Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Moved tests introduced in intel/llvm#3255 #181

Merged
merged 2 commits into from
Mar 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions SYCL/SubGroup/sub_group_as.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// Sub-groups are not suported on Host
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>

int main(int argc, char *argv[]) {
cl::sycl::queue queue;
printf("Device Name = %s\n",
queue.get_device().get_info<cl::sycl::info::device::name>().c_str());

// Initialize some host memory
constexpr int N = 64;
int host_mem[N];
for (int i = 0; i < N; ++i) {
host_mem[i] = i * 100;
}

// Use the device to transform each value
{
cl::sycl::buffer<int, 1> buf(host_mem, N);
queue.submit([&](cl::sycl::handler &cgh) {
auto global =
buf.get_access<cl::sycl::access::mode::read_write,
cl::sycl::access::target::global_buffer>(cgh);
sycl::accessor<int, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local(N, cgh);

cgh.parallel_for<class test>(
cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) {
cl::sycl::ONEAPI::sub_group sg = it.get_sub_group();
if (!it.get_local_id(0)) {
int end = it.get_global_id(0) + it.get_local_range()[0];
for (int i = it.get_global_id(0); i < end; i++) {
local[i] = i;
}
}
it.barrier();

int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) *
sg.get_max_local_range()[0];
// Global address space
auto x = sg.load(&global[i]);

// Local address space
auto y = sg.load(&local[i]);

sg.store(&global[i], x + y);
});
});
}

// Print results and tidy up
for (int i = 0; i < N; ++i) {
if (i * 101 != host_mem[i]) {
printf("Unexpected result %04d vs %04d\n", i * 101, host_mem[i]);
return 1;
}
}
printf("Success!\n");
return 0;
}
71 changes: 71 additions & 0 deletions SYCL/SubGroup/sub_group_as_vec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// Sub-groups are not suported on Host
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>

int main(int argc, char *argv[]) {
cl::sycl::queue queue;
printf("Device Name = %s\n",
queue.get_device().get_info<cl::sycl::info::device::name>().c_str());

// Initialize some host memory
constexpr int N = 64;
sycl::vec<int, 2> host_mem[N];
for (int i = 0; i < N; ++i) {
host_mem[i].s0() = i;
host_mem[i].s1() = 0;
}

// Use the device to transform each value
{
cl::sycl::buffer<sycl::vec<int, 2>, 1> buf(host_mem, N);
queue.submit([&](cl::sycl::handler &cgh) {
auto global =
buf.get_access<cl::sycl::access::mode::read_write,
cl::sycl::access::target::global_buffer>(cgh);
sycl::accessor<sycl::vec<int, 2>, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local(N, cgh);
cgh.parallel_for<class test>(
cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) {
cl::sycl::ONEAPI::sub_group sg = it.get_sub_group();
if (!it.get_local_id(0)) {
int end = it.get_global_id(0) + it.get_local_range()[0];
for (int i = it.get_global_id(0); i < end; i++) {
local[i].s0() = 0;
local[i].s1() = i;
}
}
it.barrier();

int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) *
sg.get_max_local_range()[0];
// Global address space
auto x = sg.load(&global[i]);

// Local address space
auto y = sg.load(&local[i]);

sg.store(&global[i], x + y);
});
});
}

// Print results and tidy up
for (int i = 0; i < N; ++i) {
if (i != host_mem[i].s0() || i != host_mem[i].s1()) {
printf("Unexpected result [%02d,%02d] vs [%02d,%02d]\n", i, i,
host_mem[i].s0(), host_mem[i].s1());
return 1;
}
}
printf("Success!\n");
return 0;
}
2 changes: 1 addition & 1 deletion SYCL/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@
if config.sycl_be == 'cuda':
config.substitutions.append( ('%sycl_triple', "nvptx64-nvidia-cuda-sycldevice" ) )
else:
config.substitutions.append( ('%sycl_triple', "spir64-unknown-linux-sycldevice" ) )
config.substitutions.append( ('%sycl_triple', "spir64-unknown-unknown-sycldevice" ) )

if find_executable('sycl-ls'):
config.available_features.add('sycl-ls')
Expand Down
2 changes: 0 additions & 2 deletions SYCL/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ config.sycl_tools_dir = config.llvm_tools_dir
config.sycl_include = os.path.join(config.dpcpp_root_dir, 'include', 'sycl')
config.sycl_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.sycl_libs_dir = os.path.join(config.dpcpp_root_dir, ('bin' if platform.system() == "Windows" else 'lib'))
config.target_triple = "x86_64-unknown-unknown-gnu"
config.host_triple = "x86_64-unknown-unknown-gnu"

config.opencl_libs_dir = (os.path.dirname("@OpenCL_LIBRARY@") if "@OpenCL_LIBRARY@" else "")
config.level_zero_libs_dir = "@LEVEL_ZERO_LIBS_DIR@"
Expand Down