@@ -114,6 +114,24 @@ pub unsafe fn _mm512_maskz_madd52lo_epu64(
114114 simd_select_bitmask ( k, vpmadd52luq_512 ( a, b, c) , _mm512_setzero_si512 ( ) )
115115}
116116
117+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
118+ /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
119+ /// unsigned integer from the intermediate result with the
120+ /// corresponding unsigned 64-bit integer in `a`, and store the
121+ /// results in `dst`.
122+ ///
123+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
124+ #[ inline]
125+ #[ target_feature( enable = "avxifma" ) ]
126+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
127+ #[ cfg_attr(
128+ all( test, any( target_os = "linux" , target_env = "msvc" ) ) ,
129+ assert_instr( vpmadd52huq)
130+ ) ]
131+ pub unsafe fn _mm256_madd52hi_avx_epu64 ( a : __m256i , b : __m256i , c : __m256i ) -> __m256i {
132+ vpmadd52huq_256 ( a, b, c)
133+ }
134+
117135/// Multiply packed unsigned 52-bit integers in each 64-bit element of
118136/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
119137/// unsigned integer from the intermediate result with the
@@ -169,6 +187,24 @@ pub unsafe fn _mm256_maskz_madd52hi_epu64(
169187 simd_select_bitmask ( k, vpmadd52huq_256 ( a, b, c) , _mm256_setzero_si256 ( ) )
170188}
171189
190+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
191+ /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
192+ /// unsigned integer from the intermediate result with the
193+ /// corresponding unsigned 64-bit integer in `a`, and store the
194+ /// results in `dst`.
195+ ///
196+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
197+ #[ inline]
198+ #[ target_feature( enable = "avxifma" ) ]
199+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
200+ #[ cfg_attr(
201+ all( test, any( target_os = "linux" , target_env = "msvc" ) ) ,
202+ assert_instr( vpmadd52luq)
203+ ) ]
204+ pub unsafe fn _mm256_madd52lo_avx_epu64 ( a : __m256i , b : __m256i , c : __m256i ) -> __m256i {
205+ vpmadd52luq_256 ( a, b, c)
206+ }
207+
172208/// Multiply packed unsigned 52-bit integers in each 64-bit element of
173209/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
174210/// unsigned integer from the intermediate result with the
@@ -224,6 +260,24 @@ pub unsafe fn _mm256_maskz_madd52lo_epu64(
224260 simd_select_bitmask ( k, vpmadd52luq_256 ( a, b, c) , _mm256_setzero_si256 ( ) )
225261}
226262
263+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
264+ /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
265+ /// unsigned integer from the intermediate result with the
266+ /// corresponding unsigned 64-bit integer in `a`, and store the
267+ /// results in `dst`.
268+ ///
269+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
270+ #[ inline]
271+ #[ target_feature( enable = "avxifma" ) ]
272+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
273+ #[ cfg_attr(
274+ all( test, any( target_os = "linux" , target_env = "msvc" ) ) ,
275+ assert_instr( vpmadd52huq)
276+ ) ]
277+ pub unsafe fn _mm_madd52hi_avx_epu64 ( a : __m128i , b : __m128i , c : __m128i ) -> __m128i {
278+ vpmadd52huq_128 ( a, b, c)
279+ }
280+
227281/// Multiply packed unsigned 52-bit integers in each 64-bit element of
228282/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
229283/// unsigned integer from the intermediate result with the
@@ -269,6 +323,24 @@ pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _
269323 simd_select_bitmask ( k, vpmadd52huq_128 ( a, b, c) , _mm_setzero_si128 ( ) )
270324}
271325
326+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
327+ /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
328+ /// unsigned integer from the intermediate result with the
329+ /// corresponding unsigned 64-bit integer in `a`, and store the
330+ /// results in `dst`.
331+ ///
332+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
333+ #[ inline]
334+ #[ target_feature( enable = "avxifma" ) ]
335+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
336+ #[ cfg_attr(
337+ all( test, any( target_os = "linux" , target_env = "msvc" ) ) ,
338+ assert_instr( vpmadd52luq)
339+ ) ]
340+ pub unsafe fn _mm_madd52lo_avx_epu64 ( a : __m128i , b : __m128i , c : __m128i ) -> __m128i {
341+ vpmadd52luq_128 ( a, b, c)
342+ }
343+
272344/// Multiply packed unsigned 52-bit integers in each 64-bit element of
273345/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
274346/// unsigned integer from the intermediate result with the
@@ -427,6 +499,20 @@ mod tests {
427499 assert_eq_m512i ( expected, actual) ;
428500 }
429501
502+ #[ simd_test( enable = "avxifma" ) ]
503+ unsafe fn test_mm256_madd52hi_avx_epu64 ( ) {
504+ let a = _mm256_set1_epi64x ( 10 << 40 ) ;
505+ let b = _mm256_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
506+ let c = _mm256_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
507+
508+ let actual = _mm256_madd52hi_avx_epu64 ( a, b, c) ;
509+
510+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
511+ let expected = _mm256_set1_epi64x ( 11030549757952 ) ;
512+
513+ assert_eq_m256i ( expected, actual) ;
514+ }
515+
430516 #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
431517 unsafe fn test_mm256_madd52hi_epu64 ( ) {
432518 let a = _mm256_set1_epi64x ( 10 << 40 ) ;
@@ -471,6 +557,20 @@ mod tests {
471557 assert_eq_m256i ( expected, actual) ;
472558 }
473559
560+ #[ simd_test( enable = "avxifma" ) ]
561+ unsafe fn test_mm256_madd52lo_avx_epu64 ( ) {
562+ let a = _mm256_set1_epi64x ( 10 << 40 ) ;
563+ let b = _mm256_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
564+ let c = _mm256_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
565+
566+ let actual = _mm256_madd52lo_avx_epu64 ( a, b, c) ;
567+
568+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
569+ let expected = _mm256_set1_epi64x ( 100055558127628 ) ;
570+
571+ assert_eq_m256i ( expected, actual) ;
572+ }
573+
474574 #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
475575 unsafe fn test_mm256_madd52lo_epu64 ( ) {
476576 let a = _mm256_set1_epi64x ( 10 << 40 ) ;
@@ -515,6 +615,20 @@ mod tests {
515615 assert_eq_m256i ( expected, actual) ;
516616 }
517617
618+ #[ simd_test( enable = "avxifma" ) ]
619+ unsafe fn test_mm_madd52hi_avx_epu64 ( ) {
620+ let a = _mm_set1_epi64x ( 10 << 40 ) ;
621+ let b = _mm_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
622+ let c = _mm_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
623+
624+ let actual = _mm_madd52hi_avx_epu64 ( a, b, c) ;
625+
626+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
627+ let expected = _mm_set1_epi64x ( 11030549757952 ) ;
628+
629+ assert_eq_m128i ( expected, actual) ;
630+ }
631+
518632 #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
519633 unsafe fn test_mm_madd52hi_epu64 ( ) {
520634 let a = _mm_set1_epi64x ( 10 << 40 ) ;
@@ -559,6 +673,20 @@ mod tests {
559673 assert_eq_m128i ( expected, actual) ;
560674 }
561675
676+ #[ simd_test( enable = "avxifma" ) ]
677+ unsafe fn test_mm_madd52lo_avx_epu64 ( ) {
678+ let a = _mm_set1_epi64x ( 10 << 40 ) ;
679+ let b = _mm_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
680+ let c = _mm_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
681+
682+ let actual = _mm_madd52lo_avx_epu64 ( a, b, c) ;
683+
684+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
685+ let expected = _mm_set1_epi64x ( 100055558127628 ) ;
686+
687+ assert_eq_m128i ( expected, actual) ;
688+ }
689+
562690 #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
563691 unsafe fn test_mm_madd52lo_epu64 ( ) {
564692 let a = _mm_set1_epi64x ( 10 << 40 ) ;
0 commit comments