-
Notifications
You must be signed in to change notification settings - Fork 131
[SYCL] Added tests for atomics with various memory orders and scopes #534
Changes from all commits
6260333
a900c8f
d7f7e34
0e272ec
0375249
c215e68
8a185e1
d18ca34
f3e6079
94b90b7
0ff5fe0
5351b6d
a8fb5f8
db06775
840c89d
138a98a
996581a
d4af22d
ccd5690
81abc0d
8ba8f1a
ed4ecdb
220b722
94e763f
e8c2553
d1e7591
9708521
55795d3
9a23a34
ad77010
8bae70f
19c3856
6a4fa77
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,152 @@ | ||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \ | ||
// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 | ||
// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel | ||
// semantic order and sub_group/work_group/device/system scope is tested | ||
// separately. This is controlled by macros, defined by RUN commands. Defaults | ||
// (no macro for a group) are: 32 bit, relaxed and device. | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
Comment on lines
+7
to
+10
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the the compiled for a single target - NVIDIA GPU? If so, I suggest run on GPU device only. 700 of 757 test finish in less than 10 sec on AMD GPU: [ 8.0s,10.0s) :: [* ] :: [ 25/757] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Atomic tests used to have multiple files for each operation. With the changes in this PR I would have to add many more if I did not refactor them - so we have equivalent of what would be 24 files in one here, but it still requires separate RUN comands. See the conversation here: #534 (comment) |
||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64 | ||
// RUN: %HOST_RUN_PLACEHOLDER %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUN: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
#include "add.h" | ||
#include <iostream> | ||
using namespace sycl; | ||
|
||
// Floating-point types do not support pre- or post-increment | ||
template <> void add_test<float>(queue q, size_t N) { | ||
add_fetch_test<::sycl::ext::oneapi::atomic_ref, | ||
access::address_space::global_space, float>(q, N); | ||
add_fetch_test<::sycl::atomic_ref, access::address_space::global_space, | ||
float>(q, N); | ||
add_plus_equal_test<::sycl::ext::oneapi::atomic_ref, | ||
access::address_space::global_space, float>(q, N); | ||
add_plus_equal_test<::sycl::atomic_ref, access::address_space::global_space, | ||
float>(q, N); | ||
} | ||
|
||
int main() { | ||
queue q; | ||
|
||
constexpr int N = 32; | ||
add_test<int>(q, N); | ||
add_test<unsigned int>(q, N); | ||
add_test<float>(q, N); | ||
|
||
// Include long tests if they are 32 bits wide | ||
if constexpr (sizeof(long) == 4) { | ||
add_test<long>(q, N); | ||
add_test<unsigned long>(q, N); | ||
} | ||
|
||
// Include pointer tests if they are 32 bits wide | ||
if constexpr (sizeof(char *) == 4) { | ||
add_test<char *, ptrdiff_t>(q, N); | ||
} | ||
|
||
std::cout << "Test passed." << std::endl; | ||
} | ||
|
||
int main() { add_test_all<access::address_space::global_space>(); } |
Uh oh!
There was an error while loading. Please reload this page.