@@ -649,8 +649,8 @@ inline ESIMD_NODEBUG void esimd_sbarrier(split_barrier_action flag) {
649
649
// / Declare per-work-group slm size.
650
650
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init (uint32_t size);
651
651
652
- // / SLM gather (version for 4-byte block size) .
653
- // / \tparam T element type of the input vector.
652
+ // / SLM gather.
653
+ // / \tparam T element type of the input vector, must be 4-byte type .
654
654
// / \tparam N size of the \p offsets , \p pred and returned vectors. Must be 16
655
655
// / or 32.
656
656
// / @param offsets byte-offsets within the SLM.
@@ -665,26 +665,8 @@ ESIMD_INLINE ESIMD_NODEBUG
665
665
return __esimd_slm_read<T, N>(offsets.data (), pred.data ());
666
666
}
667
667
668
- // / SLM gather (version for 1- and 2-byte block size).
669
- // / \tparam T element type of the input vector.
670
- // / \tparam N size of the \p offsets , \p pred and returned vectors. Must be 16
671
- // / or 32.
672
- // / @param offsets byte-offsets within the SLM.
673
- // / @param pred predication control used for masking lanes.
674
- // / @return vector of read values of type \p T.
675
- // / \ingroup sycl_esimd
676
- template <typename T, int N>
677
- ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t <
678
- (N == 16 || N == 32 ) && (sizeof (T) == 1 || sizeof (T) == 2 ), simd<T, N>>
679
- slm_load (simd<uint32_t , N> offsets, simd<uint16_t , N> pred = 1 ) {
680
- typedef typename detail::dword_type<T>::type T1;
681
- simd<T1, N> temp = __esimd_slm_read<T1, N>(offsets.data (), pred.data ());
682
- simd<T, N> res = temp;
683
- return res;
684
- }
685
-
686
- // / SLM scatter (version for 4-byte block size).
687
- // / \tparam T element type of the input vector.
668
+ // / SLM scatter.
669
+ // / \tparam T element type of the input vector, must be 4-byte type.
688
670
// / \tparam N size of the \p offsets , \p pred and \p vals vectors. Must be 16
689
671
// / or 32.
690
672
// / @param vals values to be written.
@@ -700,24 +682,6 @@ ESIMD_INLINE ESIMD_NODEBUG
700
682
__esimd_slm_write<T, N>(offsets.data (), vals.data (), pred.data ());
701
683
}
702
684
703
- // / SLM scatter (version for 1- and 2-byte block size).
704
- // / \tparam T element type of the input vector.
705
- // / \tparam N size of the \p offsets , \p pred and \p vals vectors. Must be 16
706
- // / or 32.
707
- // / @param vals values to be written.
708
- // / @param offsets byte-offsets within the SLM.
709
- // / @param pred predication control used for masking lanes.
710
- // / \ingroup sycl_esimd
711
- template <typename T, int N>
712
- ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t <
713
- (N == 16 || N == 32 ) && (sizeof (T) == 1 || sizeof (T) == 2 ), void >
714
- slm_store (simd<T, N> vals, simd<uint32_t , N> offsets,
715
- simd<uint16_t , N> pred = 1 ) {
716
- typedef typename detail::dword_type<T>::type T1;
717
- simd<T1, N> temp = vals;
718
- __esimd_slm_write<T1, N>(offsets.data (), temp.data (), pred.data ());
719
- }
720
-
721
685
// / SLM gather4.
722
686
// /
723
687
// / Only allow simd-8, simd-16 and simd-32.
0 commit comments