@@ -2178,6 +2178,244 @@ template <typename RetT, typename AT, typename BT>
2178
2178
inline constexpr RetT extend_vavrg2_sat(AT a, BT b, RetT c);
2179
2179
```
2180
2180
2181
+ Similarly, a set of vectorized extend 32-bit operations is provided in the math
2182
+ header treating each of the 32-bit operands as 4-elements vector (8-bits each)
2183
+ while handling sign extension to 9-bits internally. There is support for `add`,
2184
+ `sub`, `absdiff`, `min`, `max` and `avg` binary operations.
2185
+ Each operation provides has a `_sat` variat which determines if the returning
2186
+ value is saturated or not, and a `_add` variant that computes the binary sum
2187
+ of the the initial operation outputs and a third operand.
2188
+
2189
+ ```cpp
2190
+ /// Compute vectorized addition of \p a and \p b, with each value treated as a
2191
+ /// 4 elements vector type and extend each element to 9 bit.
2192
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2193
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2194
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2195
+ /// \param [in] a The first value
2196
+ /// \param [in] b The second value
2197
+ /// \param [in] c The third value
2198
+ /// \returns The extend vectorized addition of the two values
2199
+ template <typename RetT, typename AT, typename BT>
2200
+ inline constexpr RetT extend_vadd4(AT a, BT b, RetT c);
2201
+
2202
+ /// Compute vectorized addition of \p a and \p b, with each value treated as a 4
2203
+ /// elements vector type and extend each element to 9 bit. Then add each half
2204
+ /// of the result and add with \p c.
2205
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2206
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2207
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2208
+ /// \param [in] a The first value
2209
+ /// \param [in] b The second value
2210
+ /// \param [in] c The third value
2211
+ /// \returns The addition of each half of extend vectorized addition of the two
2212
+ /// values and the third value
2213
+ template <typename RetT, typename AT, typename BT>
2214
+ inline constexpr RetT extend_vadd4_add(AT a, BT b, RetT c);
2215
+
2216
+ /// Compute vectorized addition of \p a and \p b with saturation, with each
2217
+ /// value treated as a 4 elements vector type and extend each element to 9 bit.
2218
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2219
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2220
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2221
+ /// \param [in] a The first value
2222
+ /// \param [in] b The second value
2223
+ /// \param [in] c The third value
2224
+ /// \returns The extend vectorized addition of the two values with saturation
2225
+ template <typename RetT, typename AT, typename BT>
2226
+ inline constexpr RetT extend_vadd4_sat(AT a, BT b, RetT c);
2227
+
2228
+ /// Compute vectorized subtraction of \p a and \p b, with each value treated as
2229
+ /// a 4 elements vector type and extend each element to 9 bit.
2230
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2231
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2232
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2233
+ /// \param [in] a The first value
2234
+ /// \param [in] b The second value
2235
+ /// \param [in] c The third value
2236
+ /// \returns The extend vectorized subtraction of the two values
2237
+ template <typename RetT, typename AT, typename BT>
2238
+ inline constexpr RetT extend_vsub4(AT a, BT b, RetT c);
2239
+
2240
+ /// Compute vectorized subtraction of \p a and \p b, with each value treated as
2241
+ /// a 4 elements vector type and extend each element to 9 bit. Then add each
2242
+ /// half of the result and add with \p c.
2243
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2244
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2245
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2246
+ /// \param [in] a The first value
2247
+ /// \param [in] b The second value
2248
+ /// \param [in] c The third value
2249
+ /// \returns The addition of each half of extend vectorized subtraction of the
2250
+ /// two values and the third value
2251
+ template <typename RetT, typename AT, typename BT>
2252
+ inline constexpr RetT extend_vsub4_add(AT a, BT b, RetT c);
2253
+
2254
+ /// Compute vectorized subtraction of \p a and \p b with saturation, with each
2255
+ /// value treated as a 4 elements vector type and extend each element to 9 bit.
2256
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2257
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2258
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2259
+ /// \param [in] a The first value
2260
+ /// \param [in] b The second value
2261
+ /// \param [in] c The third value
2262
+ /// \returns The extend vectorized subtraction of the two values with saturation
2263
+ template <typename RetT, typename AT, typename BT>
2264
+ inline constexpr RetT extend_vsub4_sat(AT a, BT b, RetT c);
2265
+
2266
+ /// Compute vectorized abs_diff of \p a and \p b, with each value treated as a 4
2267
+ /// elements vector type and extend each element to 9 bit.
2268
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2269
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2270
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2271
+ /// \param [in] a The first value
2272
+ /// \param [in] b The second value
2273
+ /// \param [in] c The third value
2274
+ /// \returns The extend vectorized abs_diff of the two values
2275
+ template <typename RetT, typename AT, typename BT>
2276
+ inline constexpr RetT extend_vabsdiff4(AT a, BT b, RetT c);
2277
+
2278
+ /// Compute vectorized abs_diff of \p a and \p b, with each value treated as a 4
2279
+ /// elements vector type and extend each element to 9 bit. Then add each half
2280
+ /// of the result and add with \p c.
2281
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2282
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2283
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2284
+ /// \param [in] a The first value
2285
+ /// \param [in] b The second value
2286
+ /// \param [in] c The third value
2287
+ /// \returns The addition of each half of extend vectorized abs_diff of the
2288
+ /// two values and the third value
2289
+ template <typename RetT, typename AT, typename BT>
2290
+ inline constexpr RetT extend_vabsdiff4_add(AT a, BT b, RetT c);
2291
+
2292
+ /// Compute vectorized abs_diff of \p a and \p b with saturation, with each
2293
+ /// value treated as a 4 elements vector type and extend each element to 9 bit.
2294
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2295
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2296
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2297
+ /// \param [in] a The first value
2298
+ /// \param [in] b The second value
2299
+ /// \param [in] c The third value
2300
+ /// \returns The extend vectorized abs_diff of the two values with saturation
2301
+ template <typename RetT, typename AT, typename BT>
2302
+ inline constexpr RetT extend_vabsdiff4_sat(AT a, BT b, RetT c);
2303
+
2304
+ /// Compute vectorized minimum of \p a and \p b, with each value treated as a 4
2305
+ /// elements vector type and extend each element to 9 bit.
2306
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2307
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2308
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2309
+ /// \param [in] a The first value
2310
+ /// \param [in] b The second value
2311
+ /// \param [in] c The third value
2312
+ /// \returns The extend vectorized minimum of the two values
2313
+ template <typename RetT, typename AT, typename BT>
2314
+ inline constexpr RetT extend_vmin4(AT a, BT b, RetT c);
2315
+
2316
+ /// Compute vectorized minimum of \p a and \p b, with each value treated as a 4
2317
+ /// elements vector type and extend each element to 9 bit. Then add each half
2318
+ /// of the result and add with \p c.
2319
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2320
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2321
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2322
+ /// \param [in] a The first value
2323
+ /// \param [in] b The second value
2324
+ /// \param [in] c The third value
2325
+ /// \returns The addition of each half of extend vectorized minimum of the
2326
+ /// two values and the third value
2327
+ template <typename RetT, typename AT, typename BT>
2328
+ inline constexpr RetT extend_vmin4_add(AT a, BT b, RetT c);
2329
+
2330
+ /// Compute vectorized minimum of \p a and \p b with saturation, with each value
2331
+ /// treated as a 4 elements vector type and extend each element to 9 bit.
2332
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2333
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2334
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2335
+ /// \param [in] a The first value
2336
+ /// \param [in] b The second value
2337
+ /// \param [in] c The third value
2338
+ /// \returns The extend vectorized minimum of the two values with saturation
2339
+ template <typename RetT, typename AT, typename BT>
2340
+ inline constexpr RetT extend_vmin4_sat(AT a, BT b, RetT c);
2341
+
2342
+ /// Compute vectorized maximum of \p a and \p b, with each value treated as a 4
2343
+ /// elements vector type and extend each element to 9 bit.
2344
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2345
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2346
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2347
+ /// \param [in] a The first value
2348
+ /// \param [in] b The second value
2349
+ /// \param [in] c The third value
2350
+ /// \returns The extend vectorized maximum of the two values
2351
+ template <typename RetT, typename AT, typename BT>
2352
+ inline constexpr RetT extend_vmax4(AT a, BT b, RetT c);
2353
+
2354
+ /// Compute vectorized maximum of \p a and \p b, with each value treated as a 4
2355
+ /// elements vector type and extend each element to 9 bit. Then add each half
2356
+ /// of the result and add with \p c.
2357
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2358
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2359
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2360
+ /// \param [in] a The first value
2361
+ /// \param [in] b The second value
2362
+ /// \param [in] c The third value
2363
+ /// \returns The addition of each half of extend vectorized maximum of the
2364
+ /// two values and the third value
2365
+ template <typename RetT, typename AT, typename BT>
2366
+ inline constexpr RetT extend_vmax4_add(AT a, BT b, RetT c);
2367
+
2368
+ /// Compute vectorized maximum of \p a and \p b with saturation, with each value
2369
+ /// treated as a 4 elements vector type and extend each element to 9 bit.
2370
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2371
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2372
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2373
+ /// \param [in] a The first value
2374
+ /// \param [in] b The second value
2375
+ /// \param [in] c The third value
2376
+ /// \returns The extend vectorized maximum of the two values with saturation
2377
+ template <typename RetT, typename AT, typename BT>
2378
+ inline constexpr RetT extend_vmax4_sat(AT a, BT b, RetT c);
2379
+
2380
+ /// Compute vectorized average of \p a and \p b, with each value treated as a 4
2381
+ /// elements vector type and extend each element to 9 bit.
2382
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2383
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2384
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2385
+ /// \param [in] a The first value
2386
+ /// \param [in] b The second value
2387
+ /// \param [in] c The third value
2388
+ /// \returns The extend vectorized average of the two values
2389
+ template <typename RetT, typename AT, typename BT>
2390
+ inline constexpr RetT extend_vavrg4(AT a, BT b, RetT c);
2391
+
2392
+ /// Compute vectorized average of \p a and \p b, with each value treated as a 4
2393
+ /// elements vector type and extend each element to 9 bit. Then add each half
2394
+ /// of the result and add with \p c.
2395
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2396
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2397
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2398
+ /// \param [in] a The first value
2399
+ /// \param [in] b The second value
2400
+ /// \param [in] c The third value
2401
+ /// \returns The addition of each half of extend vectorized average of the
2402
+ /// two values and the third value
2403
+ template <typename RetT, typename AT, typename BT>
2404
+ inline constexpr RetT extend_vavrg4_add(AT a, BT b, RetT c);
2405
+
2406
+ /// Compute vectorized average of \p a and \p b with saturation, with each value
2407
+ /// treated as a 4 elements vector type and extend each element to 9 bit.
2408
+ /// \tparam [in] RetT The type of the return value, can only be 32 bit integer
2409
+ /// \tparam [in] AT The type of the first value, can only be 32 bit integer
2410
+ /// \tparam [in] BT The type of the second value, can only be 32 bit integer
2411
+ /// \param [in] a The first value
2412
+ /// \param [in] b The second value
2413
+ /// \param [in] c The third value
2414
+ /// \returns The extend vectorized average of the two values with saturation
2415
+ template <typename RetT, typename AT, typename BT>
2416
+ inline constexpr RetT extend_vavrg4_sat(AT a, BT b, RetT c);
2417
+ ```
2418
+
2181
2419
The math header file provides APIs for bit-field insertion (` bfi_safe ` ) and
2182
2420
bit-field extraction (` bfe_safe ` ). These are bounds-checked variants of
2183
2421
underlying ` detail ` APIs (` detail::bfi ` , ` detail::bfe ` ) which, in future
0 commit comments