@@ -655,39 +655,92 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init(uint32_t size);
655
655
// /
656
656
// / Only allow simd-16 and simd-32.
657
657
template <typename T, int n>
658
- ESIMD_INLINE ESIMD_NODEBUG
659
- typename sycl::detail::enable_if_t <(n == 16 || n == 32 ), simd<T, n>>
660
- slm_load (simd<uint32_t , n> offsets, simd_mask<n> Pred = 1 ) {
661
- return __esimd_slm_read<T, n>(offsets.data (), Pred.data ());
658
+ ESIMD_INLINE ESIMD_NODEBUG std::enable_if_t <(n == 16 || n == 32 ), simd<T, n>>
659
+ slm_gather (simd<uint32_t , n> offsets, simd_mask<n> pred = 1 ) {
660
+ return __esimd_slm_read<T, n>(offsets.data (), pred.data ());
661
+ }
662
+
663
+ // / SLM gather (deprecated version).
664
+ template <typename T, int n>
665
+ __SYCL_DEPRECATED (" use slm_gather." )
666
+ ESIMD_INLINE
667
+ ESIMD_NODEBUG std::enable_if_t <(n == 16 || n == 32 ), simd<T, n>> slm_load (
668
+ simd<uint32_t , n> offsets, simd<uint16_t , n> pred = 1 ) {
669
+ return slm_gather<T, n>(offsets, pred);
662
670
}
663
671
664
672
// / SLM scatter.
665
673
template <typename T, int n>
666
- ESIMD_INLINE ESIMD_NODEBUG
667
- typename sycl::detail::enable_if_t <(n == 16 || n == 32 ), void >
668
- slm_store (simd<T, n> vals, simd<uint32_t , n> offsets,
669
- simd_mask<n> pred = 1 ) {
674
+ ESIMD_INLINE ESIMD_NODEBUG std::enable_if_t <(n == 16 || n == 32 )>
675
+ slm_scatter (simd<T, n> vals, simd<uint32_t , n> offsets, simd_mask<n> pred = 1 ) {
670
676
__esimd_slm_write<T, n>(offsets.data (), vals.data (), pred.data ());
671
677
}
672
678
679
+ // / SLM scatter (deprecated version).
680
+ template <typename T, int n>
681
+ __SYCL_DEPRECATED (" use slm_scatter." )
682
+ ESIMD_INLINE ESIMD_NODEBUG std::enable_if_t <(n == 16 || n == 32 )> slm_store (
683
+ simd<T, n> vals, simd<uint32_t , n> offsets, simd<uint16_t , n> pred = 1 ) {
684
+ slm_scatter<T, n>(vals, offsets, pred);
685
+ }
686
+
687
+ // / Gathering read from the SLM given specified \p offsets.
688
+ // / Up to 4 data elements may be accessed at each address depending on the
689
+ // / enabled channel \p Mask.
690
+ // / \tparam T element type of the returned vector. Must be 4-byte.
691
+ // / \tparam N size of the \p offsets vector. Must be 8, 16 or 32.
692
+ // / \tparam Mask represents a pixel's channel mask.
693
+ // / @param offsets byte-offsets within the SLM.
694
+ // / @param pred predication control used for masking lanes.
695
+ // / \ingroup sycl_esimd
696
+ template <typename T, int N, rgba_channel_mask Mask>
697
+ ESIMD_INLINE ESIMD_NODEBUG
698
+ std::enable_if_t <(N == 8 || N == 16 || N == 32 ) && (sizeof (T) == 4 ),
699
+ simd<T, N * get_num_channels_enabled (Mask)>>
700
+ slm_gather_rgba(simd<uint32_t , N> offsets, simd<uint16_t , N> pred = 1 ) {
701
+ return __esimd_slm_read4<T, N, Mask>(offsets.data (), pred.data ());
702
+ }
703
+
673
704
// / SLM gather4.
674
705
// /
675
706
// / Only allow simd-8, simd-16 and simd-32.
676
707
template <typename T, int n, rgba_channel_mask Mask>
677
- ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t <
708
+ __SYCL_DEPRECATED (" use slm_gather_rgba." )
709
+ ESIMD_INLINE ESIMD_NODEBUG std::enable_if_t <
678
710
(n == 8 || n == 16 || n == 32 ) && (sizeof (T) == 4 ),
679
- simd<T, n * get_num_channels_enabled (Mask)>>
680
- slm_load4(simd<uint32_t , n> offsets, simd_mask<n> pred = 1 ) {
681
- return __esimd_slm_read4<T, n, Mask>(offsets.data (), pred.data ());
711
+ simd<T, n * get_num_channels_enabled (Mask)>> slm_load4(simd<uint32_t , n>
712
+ offsets,
713
+ simd_mask<n> pred =
714
+ 1 ) {
715
+ return slm_gather_rgba<T, n, Mask>(offsets, pred);
716
+ }
717
+
718
+ // / Scatter write to the SLM given specified \p offsets.
719
+ // / Up to 4 data elements may be written at each address depending on the
720
+ // / enabled channel \p Mask.
721
+ // / \tparam T element type of the input vector. Must be 4-byte.
722
+ // / \tparam N size of the \p offsets vector. Must be 8, 16 or 32.
723
+ // / \tparam Mask represents a pixel's channel mask.
724
+ // / @param vals values to be written.
725
+ // / @param offsets byte-offsets within the SLM.
726
+ // / @param pred predication control used for masking lanes.
727
+ // / \ingroup sycl_esimd
728
+ template <typename T, int N, rgba_channel_mask Mask>
729
+ ESIMD_INLINE ESIMD_NODEBUG
730
+ std::enable_if_t <(N == 8 || N == 16 || N == 32 ) && (sizeof (T) == 4 )>
731
+ slm_scatter_rgba (simd<T, N * get_num_channels_enabled (Mask)> vals,
732
+ simd<uint32_t, N> offsets, simd_mask<N> pred = 1) {
733
+ __esimd_slm_write4<T, N, Mask>(offsets.data (), vals.data (), pred.data ());
682
734
}
683
735
684
736
// / SLM scatter4.
685
737
template <typename T, int n, rgba_channel_mask Mask>
686
- ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t <
687
- (n == 8 || n == 16 || n == 32 ) && (sizeof (T) == 4 ), void >
688
- slm_store4 (simd<T, n * get_num_channels_enabled (Mask)> vals,
689
- simd<uint32_t, n> offsets, simd_mask<n> pred = 1) {
690
- __esimd_slm_write4<T, n, Mask>(offsets.data (), vals.data (), pred.data ());
738
+ __SYCL_DEPRECATED (" use slm_scatter_rgba." )
739
+ ESIMD_INLINE ESIMD_NODEBUG std::
740
+ enable_if_t<(n == 8 || n == 16 || n == 32 ) && (sizeof (T) == 4)> slm_store4(
741
+ simd<T, n * get_num_channels_enabled (Mask)> vals,
742
+ simd<uint32_t, n> offsets, simd_mask<n> pred = 1) {
743
+ slm_scatter_rgba<T, n, Mask>(vals, offsets, pred);
691
744
}
692
745
693
746
// / SLM block-load.
0 commit comments