Skip to content

[SYCL] Switch to use plain array in sycl::vec in more cases #17656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,6 @@

#pragma once

#ifndef __SYCL_USE_NEW_VEC_IMPL
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
#define __SYCL_USE_NEW_VEC_IMPL 1
#else
#define __SYCL_USE_NEW_VEC_IMPL 0
#endif
#endif

#include <cstddef>
#include <type_traits>

Expand Down
2 changes: 0 additions & 2 deletions sycl/include/sycl/detail/vector_convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@
#include <sycl/detail/generic_type_traits.hpp> // for is_sigeninteger, is_s...
#include <sycl/exception.hpp> // for errc

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
#include <sycl/detail/memcpy.hpp>
#endif
#include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
#include <sycl/vector.hpp>

Expand Down
46 changes: 38 additions & 8 deletions sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@
#endif
#endif // __clang__

// See vec::DataType definitions for more details
#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1
#else
#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0
#endif
#endif

#if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
#error "SYCL device compiler is built without ext_vector_type support"
#endif
Expand Down Expand Up @@ -86,6 +95,9 @@ struct elem {
};

namespace detail {
// To be defined in tests, trick to access vec's private methods
template <typename T1, int T2> class vec_base_test;

template <typename VecT, typename OperationLeftT, typename OperationRightT,
template <typename> class OperationCurrentT, int... Indexes>
class SwizzleOp;
Expand Down Expand Up @@ -144,16 +156,34 @@ template <typename DataT, int NumElements> class vec_base {
static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements;
// This represent type of underlying value. There should be only one field
// in the class, so vec<float, 16> should be equal to float16 in memory.
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) && \
defined(__SYCL_USE_NEW_VEC_IMPL)
using DataType = DataT[AdjustedNum];
//
// In intel/llvm#14130 we incorrectly used std::array as an underlying storage
// for vec data. The problem with std::array is that it comes from the C++
// STL headers which we do not control and they may use something that is
// illegal in SYCL device code. One of specific examples is use of debug
// assertions in MSVC's STL implementation.
//
// The better approach is to use plain C++ array, but the problem here is that
// C++ specification does not provide any guarantees about the memory layout
// of std::array and therefore directly switching to it would technically be
// an ABI-break, even though the practical chances of encountering the issue
// are low.
//
// To play it safe, we only switch to use plain array if both its size and
// alignment match those of std::array, or unless the new behavior is forced
// via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode.
using DataType = std::conditional_t<
#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
true,
#else
using DataType = std::array<DataT, AdjustedNum>;
// Assuming that std::array has the same size as the underlying array.
// C++ standard does not guarantee that, but it is true for most popular
// implementations.
static_assert(sizeof(DataType) == sizeof(DataT[AdjustedNum]));
sizeof(std::array<DataT, AdjustedNum>) == sizeof(DataT[AdjustedNum]) &&
alignof(std::array<DataT, AdjustedNum>) ==
alignof(DataT[AdjustedNum]),
#endif
DataT[AdjustedNum], std::array<DataT, AdjustedNum>>;

// To allow testing of private methods
template <typename T1, int T2> friend class detail::vec_base_test;

protected:
// fields
Expand Down
6 changes: 2 additions & 4 deletions sycl/test/abi/layout_vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec<int, 4>) {}

// CHECK: 0 | class sycl::vec<int, 4>
// ignore empty base classes
// CHECK: 0 | struct std::array<int, 4> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]

Expand All @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec<bool, 16>) {}

// CHECK: 0 | class sycl::vec<_Bool, 16>
// ignore empty base classes
// CHECK: 0 | struct std::array<_Bool, 16> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]
43 changes: 43 additions & 0 deletions sycl/test/basic_tests/vectors/storage.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1
// expected-no-diagnostics

#include <sycl/vector.hpp>

#include <type_traits>

namespace sycl {
namespace detail {
template <typename T, int N> class vec_base_test {
public:
static void do_check() {
constexpr bool uses_std_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, std::array<T, N>>;
constexpr bool uses_plain_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, T[N]>;

constexpr bool std_array_and_plain_array_have_the_same_layout =
sizeof(std::array<T, N>) == sizeof(T[N]) &&
alignof(std::array<T, N>) == alignof(T[N]);

#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \
__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE
static_assert(uses_plain_array,
"We must use plain array regardless of "
"layout, because user is opted-in for a potential ABI-break");
#else
static_assert(std_array_and_plain_array_have_the_same_layout ==
uses_plain_array,
"If layouts are the same, we must use safer plain array "
"instead of std::array, or vice versa");
static_assert(
!std_array_and_plain_array_have_the_same_layout == uses_std_array,
"If layouts are not the same, we must use std::array to preserve ABI");
#endif
}
};
} // namespace detail
} // namespace sycl

int main() { sycl::detail::vec_base_test<int, 4>::do_check(); }
10 changes: 5 additions & 5 deletions sycl/test/check_device_code/vector/bf16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.188") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
Expand All @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.420") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down
8 changes: 4 additions & 4 deletions sycl/test/check_device_code/vector/convert_bfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]]
Expand All @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.110") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]]
Expand Down Expand Up @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]]
Expand All @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.229") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]
Expand Down
Loading
Loading