@@ -1775,19 +1775,21 @@ template <bfn_t FuncControl, typename T, int N>
1775
1775
__ESIMD_API std::enable_if_t <std::is_integral_v<T>, __ESIMD_NS::simd<T, N>>
1776
1776
bfn (__ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
1777
1777
__ESIMD_NS::simd<T, N> src2) {
1778
- if constexpr (sizeof (T) == 2 || sizeof (T) == 4 ) {
1779
- constexpr uint8_t FC = static_cast <uint8_t >(FuncControl);
1780
- return __esimd_bfn<FC, T, N>(src0.data (), src1.data (), src2.data ());
1781
- } else if constexpr ((sizeof (T) == 8 ) || ((sizeof (T) == 1 ) && (N % 4 == 0 ))) {
1782
- // Bitcast 8-byte vector to 2xN vectors of 4-byte integer.
1783
- // Optimize 1-byte vectors via bitcasting to vector of 4-byte integers.
1778
+ if constexpr ((sizeof (T) == 8 ) || ((sizeof (T) == 1 ) && (N % 4 == 0 )) ||
1779
+ ((sizeof (T) == 2 ) && (N % 2 == 0 ))) {
1780
+ // Bitcast Nx8-byte vectors to 2xN vectors of 4-byte integers.
1781
+ // Bitcast Nx1-byte vectors to N/4 vectors of 4-byte integers.
1782
+ // Bitcast Nx2-byte vectors to N/2 vectors of 4-byte integers.
1784
1783
auto Result = __ESIMD_ENS::bfn<FuncControl>(
1785
1784
src0.template bit_cast_view <int32_t >().read (),
1786
1785
src1.template bit_cast_view <int32_t >().read (),
1787
1786
src2.template bit_cast_view <int32_t >().read ());
1788
1787
return Result.template bit_cast_view <T>();
1788
+ } else if constexpr (sizeof (T) == 2 || sizeof (T) == 4 ) {
1789
+ constexpr uint8_t FC = static_cast <uint8_t >(FuncControl);
1790
+ return __esimd_bfn<FC, T, N>(src0.data (), src1.data (), src2.data ());
1789
1791
} else if constexpr (N % 2 == 0 ) {
1790
- // Even number of 1 -byte elements .
1792
+ // Bitcast Nx1-byte vectors (N is even) to N/2 vectors of 4 -byte integers .
1791
1793
auto Result = __ESIMD_ENS::bfn<FuncControl>(
1792
1794
src0.template bit_cast_view <int16_t >().read (),
1793
1795
src1.template bit_cast_view <int16_t >().read (),
0 commit comments