@@ -81,12 +81,99 @@ extern "C" {
8181 #[ link_name = "llvm.ppc.altivec.vmsumshm" ]
8282 fn vmsumshm (
8383 a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int ;
84+ #[ link_name = "llvm.ppc.altivec.vmaddfp" ]
85+ fn vmaddfp (
86+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float ;
87+ #[ link_name = "llvm.ppc.altivec.vnmsubfp" ]
88+ fn vnmsubfp (
89+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float ;
90+ #[ link_name = "llvm.ppc.altivec.vsum2sws" ]
91+ fn vsum2sws ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int ;
92+ #[ link_name = "llvm.ppc.altivec.vsum4ubs" ]
93+ fn vsum4ubs ( a : vector_unsigned_char , b : vector_unsigned_int ) -> vector_unsigned_int ;
94+ #[ link_name = "llvm.ppc.altivec.vsum4sbs" ]
95+ fn vsum4sbs ( a : vector_signed_char , b : vector_signed_int ) -> vector_signed_int ;
96+ #[ link_name = "llvm.ppc.altivec.vsum4shs" ]
97+ fn vsum4shs ( a : vector_signed_short , b : vector_signed_int ) -> vector_signed_int ;
8498}
8599
86100mod sealed {
87101
88102 use super :: * ;
89103
104+ #[ inline]
105+ #[ target_feature( enable = "altivec" ) ]
106+ #[ cfg_attr( test, assert_instr( vsum4ubs) ) ]
107+ unsafe fn vec_vsum4ubs ( a : vector_unsigned_char , b : vector_unsigned_int ) -> vector_unsigned_int {
108+ vsum4ubs ( a, b)
109+ }
110+
111+ #[ inline]
112+ #[ target_feature( enable = "altivec" ) ]
113+ #[ cfg_attr( test, assert_instr( vsum4sbs) ) ]
114+ unsafe fn vec_vsum4sbs ( a : vector_signed_char , b : vector_signed_int ) -> vector_signed_int {
115+ vsum4sbs ( a, b)
116+ }
117+
118+ #[ inline]
119+ #[ target_feature( enable = "altivec" ) ]
120+ #[ cfg_attr( test, assert_instr( vsum4shs) ) ]
121+ unsafe fn vec_vsum4shs ( a : vector_signed_short , b : vector_signed_int ) -> vector_signed_int {
122+ vsum4shs ( a, b)
123+ }
124+
125+
126+ pub trait VectorSum4s < Other > {
127+ unsafe fn vec_sum4s ( self , b : Other ) -> Other ;
128+ }
129+
130+ impl VectorSum4s < vector_unsigned_int > for vector_unsigned_char {
131+ #[ inline]
132+ #[ target_feature( enable = "altivec" ) ]
133+ unsafe fn vec_sum4s ( self , b : vector_unsigned_int ) -> vector_unsigned_int {
134+ vsum4ubs ( self , b)
135+ }
136+ }
137+
138+ impl VectorSum4s < vector_signed_int > for vector_signed_char {
139+ #[ inline]
140+ #[ target_feature( enable = "altivec" ) ]
141+ unsafe fn vec_sum4s ( self , b : vector_signed_int ) -> vector_signed_int {
142+ vsum4sbs ( self , b)
143+ }
144+ }
145+
146+ impl VectorSum4s < vector_signed_int > for vector_signed_short {
147+ #[ inline]
148+ #[ target_feature( enable = "altivec" ) ]
149+ unsafe fn vec_sum4s ( self , b : vector_signed_int ) -> vector_signed_int {
150+ vsum4shs ( self , b)
151+ }
152+ }
153+
154+ #[ inline]
155+ #[ target_feature( enable = "altivec" ) ]
156+ #[ cfg_attr( test, assert_instr( vsum2sws) ) ]
157+ unsafe fn vec_vsum2sws ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
158+ vsum2sws ( a, b)
159+ }
160+
161+ #[ inline]
162+ #[ target_feature( enable = "altivec" ) ]
163+ #[ cfg_attr( test, assert_instr( vnmsubfp) ) ]
164+ unsafe fn vec_vnmsubfp (
165+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
166+ vnmsubfp ( a, b, c)
167+ }
168+
169+ #[ inline]
170+ #[ target_feature( enable = "altivec" ) ]
171+ #[ cfg_attr( test, assert_instr( vmaddfp) ) ]
172+ unsafe fn vec_vmaddfp (
173+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
174+ vmaddfp ( a, b, c)
175+ }
176+
90177 #[ inline]
91178 #[ target_feature( enable = "altivec" ) ]
92179 #[ cfg_attr( test, assert_instr( vmsumubm) ) ]
@@ -582,6 +669,20 @@ mod endian {
582669
583670 b. vec_vperm ( a, c)
584671 }
672+
673+ /// Vector Sum Across Partial (1/2) Saturated
674+ #[ inline]
675+ #[ target_feature( enable = "altivec" ) ]
676+ pub unsafe fn vec_sum2s ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
677+ // vsum2sws has big-endian bias
678+ //
679+ // swap the even b elements with the odd ones
680+ let flip = :: mem:: transmute ( u8x16:: new ( 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 11 ) ) ;
681+ let b = vec_perm ( b, b, flip) ;
682+ let c = vsum2sws ( a, b) ;
683+
684+ vec_perm ( c, c, flip)
685+ }
585686}
586687
587688/// Vector Multiply Add Saturated
@@ -629,6 +730,29 @@ pub unsafe fn vec_msums<T, U>(a: T, b: T, c: U) -> U
629730 a. vec_msums ( b, c)
630731}
631732
733+ /// Vector Multiply Add
734+ #[ inline]
735+ #[ target_feature( enable = "altivec" ) ]
736+ pub unsafe fn vec_madd ( a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
737+ vmaddfp ( a, b, c)
738+ }
739+
740+ /// Vector Negative Multiply Subtract
741+ #[ inline]
742+ #[ target_feature( enable = "altivec" ) ]
743+ pub unsafe fn vec_nmsub ( a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
744+ vnmsubfp ( a, b, c)
745+ }
746+
747+ /// Vector Sum Across Partial (1/4) Saturated
748+ #[ inline]
749+ #[ target_feature( enable = "altivec" ) ]
750+ pub unsafe fn vec_sum4s < T , U > ( a : T , b : U ) -> U
751+ where
752+ T : sealed:: VectorSum4s < U > {
753+ a. vec_sum4s ( b)
754+ }
755+
632756#[ cfg( target_endian = "big" ) ]
633757mod endian {
634758 use super :: * ;
@@ -641,6 +765,13 @@ mod endian {
641765 {
642766 a. vec_vperm ( b, c)
643767 }
768+
769+ /// Vector Sum Across Partial (1/2) Saturated
770+ #[ inline]
771+ #[ target_feature( enable = "altivec" ) ]
772+ pub unsafe fn vec_sum2s ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
773+ vsum2sws ( a, b)
774+ }
644775}
645776
646777pub use self :: endian:: * ;
@@ -768,6 +899,34 @@ mod tests {
768899 assert_eq ! ( d, :: mem:: transmute( vec_madds( a, b, c) ) ) ;
769900 }
770901
902+ #[ simd_test( enable = "altivec" ) ]
903+ unsafe fn test_vec_madd_float ( ) {
904+ let a: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
905+ let b: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
906+ let c: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
907+ let d = f32x4:: new (
908+ 0.1 * 0.1 + 0.1 ,
909+ 0.2 * 0.2 + 0.2 ,
910+ 0.3 * 0.3 + 0.3 ,
911+ 0.4 * 0.4 + 0.4 ) ;
912+
913+ assert_eq ! ( d, :: mem:: transmute( vec_madd( a, b, c) ) ) ;
914+ }
915+
916+ #[ simd_test( enable = "altivec" ) ]
917+ unsafe fn test_vec_nmsub_float ( ) {
918+ let a: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
919+ let b: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
920+ let c: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
921+ let d = f32x4:: new (
922+ -( 0.1 * 0.1 - 0.1 ) ,
923+ -( 0.2 * 0.2 - 0.2 ) ,
924+ -( 0.3 * 0.3 - 0.3 ) ,
925+ -( 0.4 * 0.4 - 0.4 ) ,
926+ ) ;
927+ assert_eq ! ( d, :: mem:: transmute( vec_nmsub( a, b, c) ) ) ;
928+ }
929+
771930 #[ simd_test( enable = "altivec" ) ]
772931 unsafe fn test_vec_mradds ( ) {
773932 let a: vector_signed_short = :: mem:: transmute ( i16x8:: new (
@@ -991,6 +1150,109 @@ mod tests {
9911150 assert_eq ! ( d, :: mem:: transmute( vec_msums( a, b, c) ) ) ;
9921151 }
9931152
1153+ #[ simd_test( enable = "altivec" ) ]
1154+ unsafe fn test_vec_sum2s ( ) {
1155+ let a: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1156+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1157+ let d = i32x4:: new (
1158+ 0 ,
1159+ 0 + 1 + 1 ,
1160+ 0 ,
1161+ 2 + 3 + 3 ) ;
1162+
1163+ assert_eq ! ( d, :: mem:: transmute( vec_sum2s( a, b) ) ) ;
1164+ }
1165+
1166+ #[ simd_test( enable = "altivec" ) ]
1167+ unsafe fn test_vec_sum4s_unsigned_char ( ) {
1168+ let a: vector_unsigned_char = :: mem:: transmute ( u8x16:: new (
1169+ 0 ,
1170+ 1 ,
1171+ 2 ,
1172+ 3 ,
1173+
1174+ 4 ,
1175+ 5 ,
1176+ 6 ,
1177+ 7 ,
1178+
1179+ 0 ,
1180+ 1 ,
1181+ 2 ,
1182+ 3 ,
1183+
1184+ 4 ,
1185+ 5 ,
1186+ 6 ,
1187+ 7 ,
1188+ ) ) ;
1189+ let b: vector_unsigned_int = :: mem:: transmute ( u32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1190+ let d = u32x4:: new (
1191+ 0 + 1 + 2 + 3 + 0 ,
1192+ 4 + 5 + 6 + 7 + 1 ,
1193+ 0 + 1 + 2 + 3 + 2 ,
1194+ 4 + 5 + 6 + 7 + 3 ,
1195+ ) ;
1196+
1197+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1198+ }
1199+ #[ simd_test( enable = "altivec" ) ]
1200+ unsafe fn test_vec_sum4s_signed_char ( ) {
1201+ let a: vector_signed_char = :: mem:: transmute ( i8x16:: new (
1202+ 0 ,
1203+ 1 ,
1204+ 2 ,
1205+ 3 ,
1206+
1207+ 4 ,
1208+ 5 ,
1209+ 6 ,
1210+ 7 ,
1211+
1212+ 0 ,
1213+ 1 ,
1214+ 2 ,
1215+ 3 ,
1216+
1217+ 4 ,
1218+ 5 ,
1219+ 6 ,
1220+ 7 ,
1221+ ) ) ;
1222+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1223+ let d = i32x4:: new (
1224+ 0 + 1 + 2 + 3 + 0 ,
1225+ 4 + 5 + 6 + 7 + 1 ,
1226+ 0 + 1 + 2 + 3 + 2 ,
1227+ 4 + 5 + 6 + 7 + 3 ,
1228+ ) ;
1229+
1230+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1231+ }
1232+ #[ simd_test( enable = "altivec" ) ]
1233+ unsafe fn test_vec_sum4s_signed_short ( ) {
1234+ let a: vector_signed_short = :: mem:: transmute ( i16x8:: new (
1235+ 0 ,
1236+ 1 ,
1237+ 2 ,
1238+ 3 ,
1239+
1240+ 4 ,
1241+ 5 ,
1242+ 6 ,
1243+ 7 ,
1244+ ) ) ;
1245+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1246+ let d = i32x4:: new (
1247+ 0 + 1 + 0 ,
1248+ 2 + 3 + 1 ,
1249+ 4 + 5 + 2 ,
1250+ 6 + 7 + 3 ,
1251+ ) ;
1252+
1253+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1254+ }
1255+
9941256 #[ simd_test( enable = "altivec" ) ]
9951257 unsafe fn vec_add_i32x4_i32x4 ( ) {
9961258 let x = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
0 commit comments