Skip to content

Commit ec979f5

Browse files
authored
[SYCL][ESIMD] Use passthrough for mask_expand_load (#15664)
The unread elements had undefined values, causing sporadic failures. We can't use the pass_thru arg to gather because that breaks Gen12 which was working before. Closes: #15257 Closes: #15653 --------- Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
1 parent ec57ad7 commit ec979f5

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14257,7 +14257,10 @@ mask_expand_load(const T *p, simd_mask<N> mask, PropertyListT props = {}) {
1425714257
// becomes an index for compressed store/expanded load operation.
1425814258
simd<uint32_t, N> offset =
1425914259
cbit(simd<uint32_t, N>(offsets::value) & pack_mask(mask));
14260-
return gather(p, offset * sizeof(T), mask, props);
14260+
simd<T, N> pass_thru = 0;
14261+
simd<T, N> res = gather(p, offset * sizeof(T), mask, props);
14262+
res.merge(pass_thru, !mask);
14263+
return res;
1426114264
}
1426214265

1426314266
/// template <typename T, int N, typename AccessorTy,
@@ -14305,7 +14308,11 @@ mask_expand_load(AccessorTy acc, uint32_t global_offset, simd_mask<N> mask,
1430514308
// becomes an index for compressed store/expanded load operation.
1430614309
simd<uint32_t, N> offset =
1430714310
cbit(simd<uint32_t, N>(offsets::value) & pack_mask(mask));
14308-
return gather<T>(acc, offset * sizeof(T) + global_offset, mask, props);
14311+
simd<T, N> pass_thru = 0;
14312+
simd<T, N> res =
14313+
gather<T>(acc, offset * sizeof(T) + global_offset, mask, props);
14314+
res.merge(pass_thru, !mask);
14315+
return res;
1430914316
}
1431014317

1431114318
/// template <typename T, int N,

sycl/test-e2e/ESIMD/mask_expand_load.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
1010
// RUN: %{run} %t.out
1111

12-
// https://github.com/intel/llvm/issues/14826
13-
// XFAIL: arch-intel_gpu_pvc
14-
1512
// This is a basic test to validate the expanded load API.
1613

1714
#include "esimd_test_utils.hpp"

0 commit comments

Comments
 (0)