Skip to content

Commit e04cd93

Browse files
authored
Merge pull request xtensor-stack#454 from serge-sans-paille/ref_314
Ref 314
2 parents 6fbf81d + 0eb5b76 commit e04cd93

39 files changed

+852
-29
lines changed

docs/source/api/basic_functions.rst

+8
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ Basic functions
5959
.. doxygenfunction:: fdim(const batch<T, N>&, const batch<T, N>&)
6060
:project: xsimd
6161

62+
.. _sadd-function-reference:
63+
.. doxygenfunction:: sadd(const simd_base<B>&, const simd_base<B>&)
64+
:project: xsimd
65+
66+
.. _ssub-function-reference:
67+
.. doxygenfunction:: ssub(const simd_base<B>&, const simd_base<B>&)
68+
:project: xsimd
69+
6270
.. _clip-function-reference:
6371
.. doxygenfunction:: clip(const simd_base<B>&, const simd_base<B>&, const simd_base<B>&)
6472
:project: xsimd

docs/source/api/math_index.rst

+4
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ Mathematical functions
6060
+---------------------------------------+----------------------------------------------------+
6161
| :ref:`fdim <fdim-function-reference>` | positive difference |
6262
+---------------------------------------+----------------------------------------------------+
63+
| :ref:`sadd <sadd-function-reference>` | saturated addition |
64+
+---------------------------------------+----------------------------------------------------+
65+
| :ref:`ssub <ssub-function-reference>` | saturated subtraction |
66+
+---------------------------------------+----------------------------------------------------+
6367
| :ref:`clip <clip-function-reference>` | clipping operation |
6468
+---------------------------------------+----------------------------------------------------+
6569

include/xsimd/math/xsimd_math.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include "xsimd_logarithm.hpp"
2121
#include "xsimd_power.hpp"
2222
#include "xsimd_rounding.hpp"
23-
#include "xsimd_scalar.hpp"
2423
#include "xsimd_trigonometric.hpp"
24+
#include "xsimd/types/xsimd_scalar.hpp"
2525

2626
#endif

include/xsimd/math/xsimd_power.hpp

+1-24
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "xsimd_horner.hpp"
1919
#include "xsimd_logarithm.hpp"
2020
#include "xsimd_numerical_constant.hpp"
21+
#include "xsimd/types/xsimd_common_math.hpp"
2122

2223
namespace xsimd
2324
{
@@ -85,30 +86,6 @@ namespace xsimd
8586
}
8687
};
8788

88-
template <class T0, class T1>
89-
inline T0
90-
ipow(const T0& t0, const T1& t1)
91-
{
92-
static_assert(std::is_integral<T1>::value, "second argument must be an integer");
93-
T0 a = t0;
94-
T1 b = t1;
95-
bool const recip = b < 0;
96-
T0 r{static_cast<T0>(1)};
97-
while (1)
98-
{
99-
if (b & 1)
100-
{
101-
r *= a;
102-
}
103-
b /= 2;
104-
if (b == 0)
105-
{
106-
break;
107-
}
108-
a *= a;
109-
}
110-
return recip ? 1 / r : r;
111-
}
11289
}
11390

11491
template <class B>

include/xsimd/types/xsimd_avx512_double.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,16 @@ namespace xsimd
372372
return _mm512_sub_pd(lhs, rhs);
373373
}
374374

375+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
376+
{
377+
return add(lhs, rhs); //do something for inf ?
378+
}
379+
380+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
381+
{
382+
return sub(lhs, rhs); //do something for inf ?
383+
}
384+
375385
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
376386
{
377387
return _mm512_mul_pd(lhs, rhs);

include/xsimd/types/xsimd_avx512_float.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,16 @@ namespace xsimd
395395
return _mm512_sub_ps(lhs, rhs);
396396
}
397397

398+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
399+
{
400+
return add(lhs, rhs); //do something for inf ?
401+
}
402+
403+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
404+
{
405+
return sub(lhs, rhs); //do something for inf ?
406+
}
407+
398408
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
399409
{
400410
return _mm512_mul_ps(lhs, rhs);

include/xsimd/types/xsimd_avx512_int16.hpp

+36
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,24 @@ namespace xsimd
243243
#endif
244244
}
245245

246+
static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
247+
{
248+
#if defined(XSIMD_AVX512BW_AVAILABLE)
249+
return _mm512_adds_epi16(lhs, rhs);
250+
#else
251+
XSIMD_APPLY_AVX2_FUNCTION_INT16(sadd, lhs, rhs);
252+
#endif
253+
}
254+
255+
static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
256+
{
257+
#if defined(XSIMD_AVX512BW_AVAILABLE)
258+
return _mm512_subs_epi16(lhs, rhs);
259+
#else
260+
XSIMD_APPLY_AVX2_FUNCTION_INT16(ssub, lhs, rhs);
261+
#endif
262+
}
263+
246264
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
247265
{
248266
#if defined(XSIMD_AVX512BW_AVAILABLE)
@@ -477,6 +495,24 @@ namespace xsimd
477495
XSIMD_APPLY_AVX2_FUNCTION_INT16(lte, lhs, rhs);
478496
#endif
479497
}
498+
499+
static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
500+
{
501+
#if defined(XSIMD_AVX512BW_AVAILABLE)
502+
return _mm512_adds_epu16(lhs, rhs);
503+
#else
504+
XSIMD_APPLY_AVX2_FUNCTION_UINT16(sadd, lhs, rhs);
505+
#endif
506+
}
507+
508+
static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
509+
{
510+
#if defined(XSIMD_AVX512BW_AVAILABLE)
511+
return _mm512_subs_epu16(lhs, rhs);
512+
#else
513+
XSIMD_APPLY_AVX2_FUNCTION_UINT16(ssub, lhs, rhs);
514+
#endif
515+
}
480516
};
481517
}
482518

include/xsimd/types/xsimd_avx512_int32.hpp

+26
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,19 @@ namespace xsimd
176176
return _mm512_sub_epi32(lhs, rhs);
177177
}
178178

179+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
180+
{
181+
batch_bool_type mask = _mm512_movepi32_mask(rhs);
182+
batch_type lhs_pos_branch = min(std::numeric_limits<value_type>::max() - rhs, lhs);
183+
batch_type lhs_neg_branch = max(std::numeric_limits<value_type>::min() - rhs, lhs);
184+
return rhs + select(mask, lhs_neg_branch, lhs_pos_branch);
185+
}
186+
187+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
188+
{
189+
return sadd(lhs, neg(rhs));
190+
}
191+
179192
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
180193
{
181194
return _mm512_mullo_epi32(lhs, rhs);
@@ -361,6 +374,19 @@ namespace xsimd
361374
{
362375
return rhs;
363376
}
377+
378+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
379+
{
380+
const auto diffmax = batch_type(std::numeric_limits<value_type>::max()) - lhs;
381+
const auto mindiff = min(diffmax, rhs);
382+
return lhs + mindiff;
383+
}
384+
385+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
386+
{
387+
const auto diff = min(lhs, rhs);
388+
return lhs - diff;
389+
}
364390
};
365391
}
366392

include/xsimd/types/xsimd_avx512_int64.hpp

+26
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,19 @@ namespace xsimd
234234
return _mm512_sub_epi64(lhs, rhs);
235235
}
236236

237+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
238+
{
239+
batch_bool_type mask = _mm512_movepi64_mask(rhs);
240+
batch_type lhs_pos_branch = min(std::numeric_limits<value_type>::max() - rhs, lhs);
241+
batch_type lhs_neg_branch = max(std::numeric_limits<value_type>::min() - rhs, lhs);
242+
return rhs + select(mask, lhs_neg_branch, lhs_pos_branch);
243+
}
244+
245+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
246+
{
247+
return sadd(lhs, neg(rhs));
248+
}
249+
237250
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
238251
{
239252
return _mm512_mullo_epi64(lhs, rhs);
@@ -430,6 +443,19 @@ namespace xsimd
430443
{
431444
return rhs;
432445
}
446+
447+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
448+
{
449+
const auto diffmax = batch_type(std::numeric_limits<value_type>::max()) - lhs;
450+
const auto mindiff = min(diffmax, rhs);
451+
return lhs + mindiff;
452+
}
453+
454+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
455+
{
456+
const auto diff = min(lhs, rhs);
457+
return lhs - diff;
458+
}
433459
};
434460
}
435461

include/xsimd/types/xsimd_avx512_int8.hpp

+36
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,24 @@ namespace xsimd
243243
#endif
244244
}
245245

246+
static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
247+
{
248+
#if defined(XSIMD_AVX512BW_AVAILABLE)
249+
return _mm512_adds_epi8(lhs, rhs);
250+
#else
251+
XSIMD_APPLY_AVX2_FUNCTION_INT8(sadd, lhs, rhs);
252+
#endif
253+
}
254+
255+
static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
256+
{
257+
#if defined(XSIMD_AVX512BW_AVAILABLE)
258+
return _mm512_subs_epi8(lhs, rhs);
259+
#else
260+
XSIMD_APPLY_AVX2_FUNCTION_INT8(ssub, lhs, rhs);
261+
#endif
262+
}
263+
246264
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
247265
{
248266
#if defined(XSIMD_AVX512BW_AVAILABLE)
@@ -481,6 +499,24 @@ namespace xsimd
481499
XSIMD_APPLY_AVX2_FUNCTION_INT8(lte, lhs, rhs);
482500
#endif
483501
}
502+
503+
static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
504+
{
505+
#if defined(XSIMD_AVX512BW_AVAILABLE)
506+
return _mm512_adds_epu8(lhs, rhs);
507+
#else
508+
XSIMD_APPLY_AVX2_FUNCTION_UINT8(sadd, lhs, rhs);
509+
#endif
510+
}
511+
512+
static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
513+
{
514+
#if defined(XSIMD_AVX512BW_AVAILABLE)
515+
return _mm512_subs_epu8(lhs, rhs);
516+
#else
517+
XSIMD_APPLY_AVX2_FUNCTION_UINT8(ssub, lhs, rhs);
518+
#endif
519+
}
484520
};
485521
}
486522

include/xsimd/types/xsimd_avx_double.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,16 @@ namespace xsimd
492492
return _mm256_sub_pd(lhs, rhs);
493493
}
494494

495+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
496+
{
497+
return add(lhs, rhs); //FIXME something special for inf ?
498+
}
499+
500+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
501+
{
502+
return sub(lhs,rhs); //FIXME something special for inf ?
503+
}
504+
495505
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
496506
{
497507
return _mm256_mul_pd(lhs, rhs);

include/xsimd/types/xsimd_avx_float.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,16 @@ namespace xsimd
522522
return _mm256_sub_ps(lhs, rhs);
523523
}
524524

525+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
526+
{
527+
return add(lhs, rhs); //FIXME something special for inf ?
528+
}
529+
530+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
531+
{
532+
return sub(lhs,rhs); //FIXME something special for inf ?
533+
}
534+
525535
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
526536
{
527537
return _mm256_mul_ps(lhs, rhs);

include/xsimd/types/xsimd_avx_int16.hpp

+36
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,24 @@ namespace xsimd
178178
XSIMD_APPLY_SSE_FUNCTION(_mm_sub_epi16, lhs, rhs);
179179
#endif
180180
}
181+
182+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
183+
{
184+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
185+
return _mm256_adds_epi16(lhs, rhs);
186+
#else
187+
XSIMD_APPLY_SSE_FUNCTION(_mm_adds_epi16, lhs, rhs);
188+
#endif
189+
}
190+
191+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
192+
{
193+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
194+
return _mm256_subs_epi16(lhs, rhs);
195+
#else
196+
XSIMD_APPLY_SSE_FUNCTION(_mm_subs_epi16, lhs, rhs);
197+
#endif
198+
}
181199

182200
static batch_type mul(const batch_type& lhs, const batch_type& rhs)
183201
{
@@ -342,6 +360,24 @@ namespace xsimd
342360
{
343361
return rhs;
344362
}
363+
364+
static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
365+
{
366+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
367+
return _mm256_adds_epu16(lhs, rhs);
368+
#else
369+
XSIMD_APPLY_SSE_FUNCTION(_mm_adds_epu16, lhs, rhs);
370+
#endif
371+
}
372+
373+
static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
374+
{
375+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
376+
return _mm256_subs_epu16(lhs, rhs);
377+
#else
378+
XSIMD_APPLY_SSE_FUNCTION(_mm_subs_epu16, lhs, rhs);
379+
#endif
380+
}
345381
};
346382
}
347383

0 commit comments

Comments
 (0)