@@ -843,7 +843,7 @@ pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 {
843843#[ inline( always) ]
844844#[ target_feature = "+avx" ]
845845#[ cfg_attr( test, assert_instr( vextractf128) ) ]
846- pub unsafe fn _mm256_extractf128_pd ( a : f64x4 , imm8 : i32 ) -> f64x2 {
846+ pub unsafe fn _mm256_extractf128_pd ( a : f64x4 , imm8 : i32 ) -> __m128d {
847847 match imm8 & 1 {
848848 0 => simd_shuffle2 ( a, _mm256_undefined_pd ( ) , [ 0 , 1 ] ) ,
849849 _ => simd_shuffle2 ( a, _mm256_undefined_pd ( ) , [ 2 , 3 ] ) ,
@@ -1068,9 +1068,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
10681068#[ inline( always) ]
10691069#[ target_feature = "+avx,+sse2" ]
10701070#[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1071- pub unsafe fn _mm_permute_pd ( a : f64x2 , imm8 : i32 ) -> f64x2 {
1072- use x86:: i586:: sse2:: _mm_undefined_pd;
1073-
1071+ pub unsafe fn _mm_permute_pd ( a : __m128d , imm8 : i32 ) -> __m128d {
10741072 let imm8 = ( imm8 & 0xFF ) as u8 ;
10751073 macro_rules! shuffle2 {
10761074 ( $a: expr, $b: expr) => {
@@ -1194,7 +1192,7 @@ pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 {
11941192#[ inline( always) ]
11951193#[ target_feature = "+avx" ]
11961194#[ cfg_attr( test, assert_instr( vinsertf128, imm8 = 1 ) ) ]
1197- pub unsafe fn _mm256_insertf128_pd ( a : f64x4 , b : f64x2 , imm8 : i32 ) -> f64x4 {
1195+ pub unsafe fn _mm256_insertf128_pd ( a : f64x4 , b : __m128d , imm8 : i32 ) -> f64x4 {
11981196 match imm8 & 1 {
11991197 0 => simd_shuffle4 ( a, _mm256_castpd128_pd256 ( b) , [ 4 , 5 , 2 , 3 ] ) ,
12001198 _ => simd_shuffle4 ( a, _mm256_castpd128_pd256 ( b) , [ 0 , 1 , 4 , 5 ] ) ,
@@ -2139,7 +2137,7 @@ pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 {
21392137#[ target_feature = "+avx" ]
21402138// This intrinsic is only used for compilation and does not generate any
21412139// instructions, thus it has zero latency.
2142- pub unsafe fn _mm256_castpd256_pd128 ( a : f64x4 ) -> f64x2 {
2140+ pub unsafe fn _mm256_castpd256_pd128 ( a : f64x4 ) -> __m128d {
21432141 simd_shuffle2 ( a, a, [ 0 , 1 ] )
21442142}
21452143
@@ -2171,7 +2169,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 {
21712169#[ target_feature = "+avx" ]
21722170// This intrinsic is only used for compilation and does not generate any
21732171// instructions, thus it has zero latency.
2174- pub unsafe fn _mm256_castpd128_pd256 ( a : f64x2 ) -> f64x4 {
2172+ pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> f64x4 {
21752173 // FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
21762174 simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] )
21772175}
@@ -2221,8 +2219,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
22212219#[ target_feature = "+avx,+sse2" ]
22222220// This intrinsic is only used for compilation and does not generate any
22232221// instructions, thus it has zero latency.
2224- pub unsafe fn _mm256_zextpd128_pd256 ( a : f64x2 ) -> f64x4 {
2225- use x86:: i586:: sse2:: _mm_setzero_pd;
2222+ pub unsafe fn _mm256_zextpd128_pd256 ( a : __m128d ) -> f64x4 {
22262223 simd_shuffle4 ( a, _mm_setzero_pd ( ) , [ 0 , 1 , 2 , 3 ] )
22272224}
22282225
@@ -2326,7 +2323,6 @@ pub unsafe fn _mm256_loadu2_m128(
23262323pub unsafe fn _mm256_loadu2_m128d (
23272324 hiaddr : * const f64 , loaddr : * const f64
23282325) -> f64x4 {
2329- use x86:: i586:: sse2:: _mm_loadu_pd;
23302326 let a = _mm256_castpd128_pd256 ( _mm_loadu_pd ( loaddr) ) ;
23312327 _mm256_insertf128_pd ( a, _mm_loadu_pd ( hiaddr) , 1 )
23322328}
@@ -2371,7 +2367,6 @@ pub unsafe fn _mm256_storeu2_m128(
23712367pub unsafe fn _mm256_storeu2_m128d (
23722368 hiaddr : * mut f64 , loaddr : * mut f64 , a : f64x4
23732369) {
2374- use x86:: i586:: sse2:: _mm_storeu_pd;
23752370 let lo = _mm256_castpd256_pd128 ( a) ;
23762371 _mm_storeu_pd ( loaddr, lo) ;
23772372 let hi = _mm256_extractf128_pd ( a, 1 ) ;
@@ -3104,9 +3099,9 @@ mod tests {
31043099 #[ simd_test = "avx" ]
31053100 unsafe fn test_mm256_extractf128_pd ( ) {
31063101 let a = f64x4:: new ( 4. , 3. , 2. , 5. ) ;
3107- let r = avx :: _mm256_extractf128_pd ( a, 0 ) ;
3108- let e = f64x2 :: new ( 4. , 3. ) ;
3109- assert_eq ! ( r, e) ;
3102+ let r = _mm256_extractf128_pd ( a, 0 ) ;
3103+ let e = _mm_setr_pd ( 4. , 3. ) ;
3104+ assert_eq_m128d ( r, e) ;
31103105 }
31113106
31123107 #[ simd_test = "avx" ]
@@ -3189,10 +3184,10 @@ mod tests {
31893184
31903185 #[ simd_test = "avx" ]
31913186 unsafe fn test_mm_permute_pd ( ) {
3192- let a = f64x2 :: new ( 4. , 3. ) ;
3193- let r = avx :: _mm_permute_pd ( a, 1 ) ;
3194- let e = f64x2 :: new ( 3. , 4. ) ;
3195- assert_eq ! ( r, e) ;
3187+ let a = _mm_setr_pd ( 4. , 3. ) ;
3188+ let r = _mm_permute_pd ( a, 1 ) ;
3189+ let e = _mm_setr_pd ( 3. , 4. ) ;
3190+ assert_eq_m128d ( r, e) ;
31963191 }
31973192
31983193 #[ simd_test = "avx" ]
@@ -3271,8 +3266,8 @@ mod tests {
32713266 #[ simd_test = "avx" ]
32723267 unsafe fn test_mm256_insertf128_pd ( ) {
32733268 let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3274- let b = f64x2 :: new ( 5. , 6. ) ;
3275- let r = avx :: _mm256_insertf128_pd ( a, b, 0 ) ;
3269+ let b = _mm_setr_pd ( 5. , 6. ) ;
3270+ let r = _mm256_insertf128_pd ( a, b, 0 ) ;
32763271 let e = f64x4:: new ( 5. , 6. , 3. , 4. ) ;
32773272 assert_eq ! ( r, e) ;
32783273 }
@@ -4078,8 +4073,8 @@ mod tests {
40784073 #[ simd_test = "avx" ]
40794074 unsafe fn test_mm256_castpd256_pd128 ( ) {
40804075 let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
4081- let r = avx :: _mm256_castpd256_pd128 ( a) ;
4082- assert_eq ! ( r, f64x2 :: new ( 1. , 2. ) ) ;
4076+ let r = _mm256_castpd256_pd128 ( a) ;
4077+ assert_eq_m128d ( r, _mm_setr_pd ( 1. , 2. ) ) ;
40834078 }
40844079
40854080 #[ simd_test = "avx" ]
@@ -4107,8 +4102,8 @@ mod tests {
41074102
41084103 #[ simd_test = "avx" ]
41094104 unsafe fn test_mm256_zextpd128_pd256 ( ) {
4110- let a = f64x2 :: new ( 1. , 2. ) ;
4111- let r = avx :: _mm256_zextpd128_pd256 ( a) ;
4105+ let a = _mm_setr_pd ( 1. , 2. ) ;
4106+ let r = _mm256_zextpd128_pd256 ( a) ;
41124107 let e = f64x4:: new ( 1. , 2. , 0. , 0. ) ;
41134108 assert_eq ! ( r, e) ;
41144109 }
@@ -4271,8 +4266,8 @@ mod tests {
42714266 & mut lo as * mut _ as * mut f64 ,
42724267 a,
42734268 ) ;
4274- assert_eq ! ( hi, f64x2 :: new ( 3. , 4. ) ) ;
4275- assert_eq ! ( lo, f64x2 :: new ( 1. , 2. ) ) ;
4269+ assert_eq_m128d ( hi, _mm_setr_pd ( 3. , 4. ) ) ;
4270+ assert_eq_m128d ( lo, _mm_setr_pd ( 1. , 2. ) ) ;
42764271 }
42774272
42784273 #[ simd_test = "avx" ]
0 commit comments