-
Notifications
You must be signed in to change notification settings - Fork 769
[ESIMD] Implement gather(acc) accepting compile-time properties #12414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
04edb74
01857b8
3288870
ca0650a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -207,7 +207,188 @@ bool testUSM(queue Q, uint32_t MaskStride, PropertiesT) { | |
} | ||
} // end if (VS == 1) | ||
Vals.copy_to(Out + GlobalID * N); | ||
// scatter(Out, ByteOffsets.template select<NOffsets, 1>(), Vals); | ||
}).wait(); | ||
} catch (sycl::exception const &e) { | ||
std::cout << "SYCL exception caught: " << e.what() << '\n'; | ||
sycl::free(In, Q); | ||
sycl::free(Out, Q); | ||
return false; | ||
} | ||
|
||
bool Passed = verify(In, Out, N, Size, VS, MaskStride, UseMask, UsePassThru); | ||
if (!Passed) | ||
std::cout << "Case FAILED" << std::endl; | ||
|
||
sycl::free(In, Q); | ||
sycl::free(Out, Q); | ||
return Passed; | ||
} | ||
|
||
template <typename T, uint16_t N, uint16_t VS, bool UseMask, bool UsePassThru, | ||
bool UseProperties, typename PropertiesT> | ||
bool testACC(queue Q, uint32_t MaskStride, PropertiesT) { | ||
|
||
static_assert(VS > 0 && N % VS == 0, | ||
"Incorrect VS parameter. N must be divisible by VS."); | ||
constexpr int NOffsets = N / VS; | ||
static_assert(!UsePassThru || UseMask, | ||
"PassThru cannot be used without using mask"); | ||
|
||
uint32_t Groups = 8; | ||
uint32_t Threads = 16; | ||
|
||
std::cout << "Running case: T=" << esimd_test::type_name<T>() << ", N=" << N | ||
<< ", VS=" << VS << ", MaskStride=" << MaskStride | ||
<< ", Groups=" << Groups << ", Threads=" << Threads | ||
<< ", use_mask=" << UseMask << ", use_pass_thru=" << UsePassThru | ||
<< ", use_properties=" << UseProperties << std::endl; | ||
|
||
uint16_t Size = Groups * Threads * N; | ||
using Tuint = esimd_test::uint_type_t<sizeof(T)>; | ||
|
||
sycl::range<1> GlobalRange{Groups}; | ||
sycl::range<1> LocalRange{Threads}; | ||
sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; | ||
|
||
T *Out = sycl::malloc_shared<T>(Size, Q); | ||
std::memset(Out, 0, Size * sizeof(T)); | ||
|
||
T *In = sycl::malloc_shared<T>(Size * 2, Q); | ||
for (int I = 0; I < Size; I++) | ||
In[I] = esimd_test::getRandomValue<T>(); | ||
|
||
try { | ||
buffer<T, 1> InBuf(In, Size * 2); | ||
Q.submit([&](handler &CGH) { | ||
accessor InAcc{InBuf, CGH}; | ||
CGH.parallel_for(Range, [=](sycl::nd_item<1> NDI) SYCL_ESIMD_KERNEL { | ||
int GlobalID = NDI.get_global_id(0); | ||
PropertiesT Props{}; | ||
|
||
simd<OffsetT, NOffsets> ByteOffsets(GlobalID * N * sizeof(T), | ||
VS * sizeof(T)); | ||
simd_view ByteOffsetsView = ByteOffsets.template select<NOffsets, 1>(); | ||
|
||
simd_mask<NOffsets> Pred; | ||
for (int I = 0; I < NOffsets; I++) | ||
Pred[I] = (I % MaskStride == 0) ? 1 : 0; | ||
|
||
using Tuint = esimd_test::uint_type_t<sizeof(T)>; | ||
simd<Tuint, N> PassThruInt(GlobalID * N, 1); | ||
simd<T, N> PassThru = PassThruInt.template bit_cast_view<T>(); | ||
auto PassThruView = PassThru.template select<N, 1>(0); | ||
|
||
simd<T, N> Vals; | ||
if constexpr (VS > 1) { // VS > 1 requires specifying <T, N, VS> | ||
if constexpr (UsePassThru) { | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 4 == 0) // ByteOffset - simd, PassThru - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru, | ||
Props); | ||
else if (GlobalID % 4 == 1) // ByteOffset - simd, PassThru - view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView, | ||
Props); | ||
else if (GlobalID % 4 == 2) // ByteOffset - view, PassThru - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru, | ||
Props); | ||
else // ByteOffset - view, PassThru - view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, | ||
PassThruView, Props); | ||
} else { // UseProperties is false | ||
if (GlobalID % 4 == 0) // ByteOffset - simd, PassThru - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThru); | ||
else if (GlobalID % 4 == 1) // ByteOffset - simd, PassThru - view | ||
Vals = | ||
gather<T, N, VS>(InAcc, ByteOffsets, Pred, PassThruView); | ||
else if (GlobalID % 4 == 2) // ByteOffset - view, PassThru - simd | ||
Vals = | ||
gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, PassThru); | ||
else // ByteOffset - view, PassThru - view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, | ||
PassThruView); | ||
} | ||
} else if constexpr (UseMask) { // UsePassThru is false | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred, Props); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred, Props); | ||
} else { // UseProperties is false | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Pred); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Pred); | ||
} | ||
} else { // UseMask is false, UsePassThru is false | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets, Props); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView, Props); | ||
} else { // UseProperties is false | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsets); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N, VS>(InAcc, ByteOffsetsView); | ||
} | ||
} | ||
} else { | ||
// if (VS == 1) then <T, N, VS> can often be omitted - test it here. | ||
// The variants accepting simd_view for 'PassThru' operand though | ||
// still require <T, N> to be specified explicitly to help | ||
// C++ FE do simd to simd_view matching. | ||
if constexpr (UsePassThru) { | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 4 == 0) // ByteOffset - simd, PassThru - simd | ||
Vals = gather<T>(InAcc, ByteOffsets, Pred, PassThru, Props); | ||
else if (GlobalID % 4 == 1) // ByteOffset - simd, PassThru - view | ||
Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView, | ||
Props); | ||
else if (GlobalID % 4 == 2) // ByteOffset - view, PassThru - simd | ||
Vals = gather(InAcc, ByteOffsetsView, Pred, PassThru, Props); | ||
else // ByteOffset - view, PassThru - view | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView, | ||
Props); | ||
} else { // UseProperties is false | ||
if (GlobalID % 4 == 0) // ByteOffset - simd, PassThru - simd | ||
Vals = gather(InAcc, ByteOffsets, Pred, PassThru); | ||
else if (GlobalID % 4 == 1) // ByteOffset - simd, PassThru - view | ||
Vals = gather<T, N>(InAcc, ByteOffsets, Pred, PassThruView); | ||
else if (GlobalID % 4 == 2) // ByteOffset - view, PassThru - simd | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThru); | ||
else // ByteOffset - view, PassThru - view | ||
Vals = | ||
gather<T, N>(InAcc, ByteOffsetsView, Pred, PassThruView); | ||
} | ||
} else if constexpr (UseMask) { // UsePassThru is false | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T>(InAcc, ByteOffsets, Pred, Props); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred, Props); | ||
} else { // UseProperties is false | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T>(InAcc, ByteOffsets, Pred); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView, Pred); | ||
} | ||
} else { // UsePassThru is false, UseMask is false | ||
if constexpr (UseProperties) { | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T>(InAcc, ByteOffsets, Props); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView, Props); | ||
} else { | ||
if (GlobalID % 2 == 0) // ByteOffset - simd | ||
Vals = gather<T>(InAcc, ByteOffsets); | ||
else // ByteOffset - simd_view | ||
Vals = gather<T, N>(InAcc, ByteOffsetsView); | ||
} | ||
} | ||
} // end if (VS == 1) | ||
Vals.copy_to(Out + GlobalID * N); | ||
// scatter(Out, ByteOffsets.template select<NOffsets, 1>(), Vals); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: remove commented code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, removed. |
||
}); | ||
}).wait(); | ||
} catch (sycl::exception const &e) { | ||
std::cout << "SYCL exception caught: " << e.what() << '\n'; | ||
|
@@ -286,3 +467,61 @@ template <typename T, TestFeatures Features> bool testUSM(queue Q) { | |
} | ||
return Passed; | ||
} | ||
|
||
template <typename T, TestFeatures Features> bool testACC(queue Q) { | ||
constexpr bool UseMask = true; | ||
constexpr bool UsePassThru = true; | ||
constexpr bool UseProperties = true; | ||
|
||
properties AlignElemProps{alignment<sizeof(T)>}; | ||
|
||
bool Passed = true; | ||
Passed &= testACC<T, 1, 1, !UseMask, !UsePassThru, !UseProperties>( | ||
Q, 2, AlignElemProps); | ||
#ifdef __ESIMD_FORCE_STATELESS_MEM | ||
Passed &= testACC<T, 2, 1, UseMask, !UsePassThru, !UseProperties>( | ||
Q, 2, AlignElemProps); | ||
Passed &= testACC<T, 4, 1, UseMask, !UsePassThru, !UseProperties>( | ||
Q, 2, AlignElemProps); | ||
#endif // __ESIMD_FORCE_STATELESS_MEM | ||
Passed &= testACC<T, 8, 1, UseMask, !UsePassThru, !UseProperties>( | ||
Q, 3, AlignElemProps); | ||
Passed &= testACC<T, 16, 1, UseMask, !UsePassThru, UseProperties>( | ||
Q, 2, AlignElemProps); | ||
Passed &= testACC<T, 32, 1, UseMask, !UsePassThru, !UseProperties>( | ||
Q, 3, AlignElemProps); | ||
|
||
if constexpr (Features == TestFeatures::PVC || | ||
Features == TestFeatures::DG2) { | ||
properties LSCProps{cache_hint_L1<cache_hint::streaming>, | ||
cache_hint_L2<cache_hint::cached>, | ||
alignment<sizeof(T)>}; | ||
Passed &= | ||
testACC<T, 1, 1, !UseMask, !UsePassThru, UseProperties>(Q, 2, LSCProps); | ||
Passed &= | ||
testACC<T, 2, 1, UseMask, !UsePassThru, UseProperties>(Q, 2, LSCProps); | ||
Passed &= | ||
testACC<T, 4, 1, UseMask, UsePassThru, UseProperties>(Q, 2, LSCProps); | ||
Passed &= | ||
testACC<T, 8, 1, UseMask, UsePassThru, UseProperties>(Q, 3, LSCProps); | ||
|
||
Passed &= | ||
testACC<T, 32, 1, UseMask, UsePassThru, UseProperties>(Q, 2, LSCProps); | ||
|
||
// Check VS > 1. GPU supports only dwords and qwords in this mode. | ||
if constexpr (sizeof(T) >= 4) { | ||
// TODO: This test case causes flaky fail. Enable it after the issue | ||
// in GPU driver is fixed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have an internal tracker for this or a version we know works? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I created a tracker before uploading this PR. |
||
// Passed &= testACC<T, 16, 2, UseMask, !UsePassThru, UseProperties>( | ||
// Q, 3, AlignElemProps); | ||
|
||
Passed &= testACC<T, 32, 2, !UseMask, !UsePassThru, UseProperties>( | ||
Q, 3, AlignElemProps); | ||
Passed &= testACC<T, 32, 2, UseMask, !UsePassThru, UseProperties>( | ||
Q, 3, AlignElemProps); | ||
Passed &= testACC<T, 32, 2, UseMask, UsePassThru, UseProperties>( | ||
Q, 3, AlignElemProps); | ||
} | ||
} | ||
return Passed; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
//==------- gather_acc.cpp - DPC++ ESIMD on-device test --------------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// Use per-kernel compilation to have more information about failing cases. | ||
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// The test verifies esimd::gather() functions accepting ACCESSOR | ||
// and optional compile-time esimd::properties. | ||
// The gather() calls in this test do not use cache-hint properties | ||
// or VS > 1 (number of loads per offset) to not impose using DG2/PVC features. | ||
|
||
#include "Inputs/gather.hpp" | ||
|
||
int main() { | ||
auto Q = queue{gpu_selector_v}; | ||
esimd_test::printTestLabel(Q); | ||
|
||
constexpr auto TestFeatures = TestFeatures::Generic; | ||
bool Passed = true; | ||
|
||
Passed &= testACC<int8_t, TestFeatures>(Q); | ||
Passed &= testACC<int16_t, TestFeatures>(Q); | ||
if (Q.get_device().has(sycl::aspect::fp16)) | ||
Passed &= testACC<sycl::half, TestFeatures>(Q); | ||
Passed &= testACC<uint32_t, TestFeatures>(Q); | ||
Passed &= testACC<float, TestFeatures>(Q); | ||
Passed &= testACC<ext::intel::experimental::esimd::tfloat32, TestFeatures>(Q); | ||
#ifdef __ESIMD_FORCE_STATELESS_MEM | ||
Passed &= testACC<int64_t, TestFeatures>(Q); | ||
if (Q.get_device().has(sycl::aspect::fp64)) | ||
Passed &= testACC<double, TestFeatures>(Q); | ||
#endif // __ESIMD_FORCE_STATELESS_MEM | ||
std::cout << (Passed ? "Passed\n" : "FAILED\n"); | ||
return Passed ? 0 : 1; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
//==------- gather_acc_dg2_pvc.cpp - DPC++ ESIMD on-device test ------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// REQUIRES: gpu-intel-dg2 || gpu-intel-pvc | ||
|
||
// Use per-kernel compilation to have more information about failing cases. | ||
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// The test verifies esimd::gather() functions accepting ACCESSOR | ||
// and optional compile-time esimd::properties. | ||
// The gather() calls in this test can use cache-hint properties | ||
// or VS > 1 (number of loads per offset). | ||
|
||
#include "Inputs/gather.hpp" | ||
|
||
int main() { | ||
auto Q = queue{gpu_selector_v}; | ||
esimd_test::printTestLabel(Q); | ||
|
||
constexpr auto TestFeatures = TestFeatures::DG2; | ||
bool Passed = true; | ||
|
||
Passed &= testACC<int8_t, TestFeatures>(Q); | ||
Passed &= testACC<int16_t, TestFeatures>(Q); | ||
if (Q.get_device().has(sycl::aspect::fp16)) | ||
Passed &= testACC<sycl::half, TestFeatures>(Q); | ||
Passed &= testACC<uint32_t, TestFeatures>(Q); | ||
Passed &= testACC<float, TestFeatures>(Q); | ||
Passed &= testACC<ext::intel::experimental::esimd::tfloat32, TestFeatures>(Q); | ||
#ifdef __ESIMD_FORCE_STATELESS_MEM | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we be running this test and the previous in stateless mode as well? It seems we are not right now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, good catch. I'll add the tests here soon. |
||
Passed &= testACC<int64_t, TestFeatures>(Q); | ||
if (Q.get_device().has(sycl::aspect::fp64)) | ||
Passed &= testACC<double, TestFeatures>(Q); | ||
#endif // __ESIMD_FORCE_STATELESS_MEM | ||
std::cout << (Passed ? "Passed\n" : "FAILED\n"); | ||
return Passed ? 0 : 1; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This must have been very painful to write