1
1
/*
2
2
* Project : SIMD_Utils
3
- * Version : 0.2.2
3
+ * Version : 0.2.3
4
4
* Author : JishinMaster
5
5
* Licence : BSD-2
6
6
*/
@@ -932,7 +932,19 @@ static inline void magnitudef_C_interleaved(complex32_t *src, float *dst, int le
932
932
#pragma omp simd
933
933
#endif
934
934
for (int i = 0 ; i < len ; i ++ ) {
935
- dst [i ] = sqrtf (src [i ].re * src [i ].re + (src [i ].im * src [i ].im ));
935
+ dst [i ] = sqrtf ((src [i ].re * src [i ].re ) + src [i ].im * src [i ].im );
936
+ }
937
+ }
938
+
939
+ static inline void magnitudef_C_interleaved_precise (complex32_t * src , float * dst , int len )
940
+ {
941
+ #ifdef OMP
942
+ #pragma omp simd
943
+ #endif
944
+ for (int i = 0 ; i < len ; i ++ ) {
945
+ double srcRe_64 = (double ) src [i ].re ;
946
+ double srcIm_64 = (double ) src [i ].im ;
947
+ dst [i ] = (float ) (sqrt ((srcRe_64 * srcRe_64 ) + srcIm_64 * srcIm_64 ));
936
948
}
937
949
}
938
950
@@ -942,18 +954,41 @@ static inline void magnitudef_C_split(float *srcRe, float *srcIm, float *dst, in
942
954
#pragma omp simd
943
955
#endif
944
956
for (int i = 0 ; i < len ; i ++ ) {
945
- dst [i ] = sqrtf (srcRe [i ] * srcRe [i ] + ( srcIm [i ] * srcIm [i ]) );
957
+ dst [i ] = sqrtf (( srcRe [i ] * srcRe [i ]) + srcIm [i ] * srcIm [i ]);
946
958
}
947
959
}
948
960
961
+ static inline void magnitudef_C_split_precise (float * srcRe , float * srcIm , float * dst , int len )
962
+ {
963
+ #ifdef OMP
964
+ #pragma omp simd
965
+ #endif
966
+ for (int i = 0 ; i < len ; i ++ ) {
967
+ double srcRe_64 = (double ) srcRe [i ];
968
+ double srcIm_64 = (double ) srcIm [i ];
969
+ dst [i ] = (float ) (sqrt ((srcRe_64 * srcRe_64 ) + srcIm_64 * srcIm_64 ));
970
+ }
971
+ }
949
972
950
973
static inline void powerspectf_C_split (float * srcRe , float * srcIm , float * dst , int len )
951
974
{
952
975
#ifdef OMP
953
976
#pragma omp simd
954
977
#endif
955
978
for (int i = 0 ; i < len ; i ++ ) {
956
- dst [i ] = srcRe [i ] * srcRe [i ] + (srcIm [i ] * srcIm [i ]);
979
+ dst [i ] = (srcRe [i ] * srcRe [i ]) + srcIm [i ] * srcIm [i ];
980
+ }
981
+ }
982
+
983
+ static inline void powerspectf_C_split_precise (float * srcRe , float * srcIm , float * dst , int len )
984
+ {
985
+ #ifdef OMP
986
+ #pragma omp simd
987
+ #endif
988
+ for (int i = 0 ; i < len ; i ++ ) {
989
+ double srcRe_64 = (double ) srcRe [i ];
990
+ double srcIm_64 = (double ) srcIm [i ];
991
+ dst [i ] = (float ) ((srcRe_64 * srcRe_64 ) + srcIm_64 * srcIm_64 );
957
992
}
958
993
}
959
994
@@ -963,7 +998,19 @@ static inline void powerspectf_C_interleaved(complex32_t *src, float *dst, int l
963
998
#pragma omp simd
964
999
#endif
965
1000
for (int i = 0 ; i < len ; i ++ ) {
966
- dst [i ] = src [i ].re * src [i ].re + (src [i ].im * src [i ].im );
1001
+ dst [i ] = (src [i ].re * src [i ].re ) + src [i ].im * src [i ].im ;
1002
+ }
1003
+ }
1004
+
1005
+ static inline void powerspectf_C_interleaved_precise (complex32_t * src , float * dst , int len )
1006
+ {
1007
+ #ifdef OMP
1008
+ #pragma omp simd
1009
+ #endif
1010
+ for (int i = 0 ; i < len ; i ++ ) {
1011
+ double srcRe_64 = (double ) src [i ].re ;
1012
+ double srcIm_64 = (double ) src [i ].im ;
1013
+ dst [i ] = (float ) ((srcRe_64 * srcRe_64 ) + srcIm_64 * srcIm_64 );
967
1014
}
968
1015
}
969
1016
@@ -1305,6 +1352,22 @@ static inline void cplxvecdiv_C(complex32_t *src1, complex32_t *src2, complex32_
1305
1352
}
1306
1353
}
1307
1354
1355
+ static inline void cplxvecdiv_C_precise (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1356
+ {
1357
+ #ifdef OMP
1358
+ #pragma omp simd
1359
+ #endif
1360
+ for (int i = 0 ; i < len ; i ++ ) {
1361
+ double src1Re_64 = (double ) src1 [i ].re ;
1362
+ double src1Im_64 = (double ) src1 [i ].im ;
1363
+ double src2Re_64 = (double ) src2 [i ].re ;
1364
+ double src2Im_64 = (double ) src2 [i ].im ;
1365
+ double c2d2 = src2Re_64 * src2Re_64 + src2Im_64 * src2Im_64 ;
1366
+ dst [i ].re = (float ) ((src1Re_64 * src2Re_64 + (src1Im_64 * src2Im_64 )) / c2d2 );
1367
+ dst [i ].im = (float ) ((- src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64 )) / c2d2 );
1368
+ }
1369
+ }
1370
+
1308
1371
1309
1372
static inline void cplxvecdiv_C_split (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1310
1373
{
@@ -1318,6 +1381,22 @@ static inline void cplxvecdiv_C_split(float *src1Re, float *src1Im, float *src2R
1318
1381
}
1319
1382
}
1320
1383
1384
+ static inline void cplxvecdiv_C_split_precise (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1385
+ {
1386
+ #ifdef OMP
1387
+ #pragma omp simd
1388
+ #endif
1389
+ for (int i = 0 ; i < len ; i ++ ) {
1390
+ double src1Re_64 = (double ) src1Re [i ];
1391
+ double src1Im_64 = (double ) src1Im [i ];
1392
+ double src2Re_64 = (double ) src2Re [i ];
1393
+ double src2Im_64 = (double ) src2Im [i ];
1394
+ double c2d2 = src2Re_64 * src2Re_64 + src2Im_64 * src2Im_64 ;
1395
+ dstRe [i ] = (float ) ((src1Re_64 * src2Re_64 + (src1Im_64 * src2Im_64 )) / c2d2 );
1396
+ dstIm [i ] = (float ) ((- src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64 )) / c2d2 );
1397
+ }
1398
+ }
1399
+
1321
1400
static inline void cplxvecmul_C (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1322
1401
{
1323
1402
#ifdef OMP
@@ -1329,6 +1408,21 @@ static inline void cplxvecmul_C(complex32_t *src1, complex32_t *src2, complex32_
1329
1408
}
1330
1409
}
1331
1410
1411
+ static inline void cplxvecmul_C_precise (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1412
+ {
1413
+ #ifdef OMP
1414
+ #pragma omp simd
1415
+ #endif
1416
+ for (int i = 0 ; i < len ; i ++ ) {
1417
+ double src1Re_64 = (double ) src1 [i ].re ;
1418
+ double src1Im_64 = (double ) src1 [i ].im ;
1419
+ double src2Re_64 = (double ) src2 [i ].re ;
1420
+ double src2Im_64 = (double ) src2 [i ].im ;
1421
+ dst [i ].re = (float ) ((src1Re_64 * src2Re_64 ) - src1Im_64 * src2Im_64 );
1422
+ dst [i ].im = (float ) (src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64 ));
1423
+ }
1424
+ }
1425
+
1332
1426
static inline void cplxvecmul_C_unrolled8 (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1333
1427
{
1334
1428
int stop_len = len / 8 ;
@@ -1361,29 +1455,32 @@ static inline void cplxvecmul_C_unrolled8(complex32_t *src1, complex32_t *src2,
1361
1455
}
1362
1456
}
1363
1457
1364
- static inline void cplxvecmul_C2 ( complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1458
+ static inline void cplxvecmul_C_split ( float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1365
1459
{
1366
1460
#ifdef OMP
1367
1461
#pragma omp simd
1368
1462
#endif
1369
1463
for (int i = 0 ; i < len ; i ++ ) {
1370
- dst [i ]. re = (float ) (( double ) src1 [i ]. re * ( double ) src2 [i ]. re - ( double ) src1 [i ]. im * ( double ) src2 [i ]. im ) ;
1371
- dst [i ]. im = ( float ) (( double ) src1 [i ]. re * ( double ) src2 [i ]. im + (double ) src2 [i ]. re * ( double ) src1 [i ]. im );
1464
+ dstRe [i ] = (src1Re [i ] * src2Re [i ]) - src1Im [i ] * src2Im [i ];
1465
+ dstIm [i ] = src1Re [i ] * src2Im [i ] + (src2Re [i ] * src1Im [i ]);
1372
1466
}
1373
1467
}
1374
1468
1375
- static inline void cplxvecmul_C_split (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1469
+ static inline void cplxvecmul_C_split_precise (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1376
1470
{
1377
1471
#ifdef OMP
1378
1472
#pragma omp simd
1379
1473
#endif
1380
1474
for (int i = 0 ; i < len ; i ++ ) {
1381
- dstRe [i ] = (src1Re [i ] * src2Re [i ]) - src1Im [i ] * src2Im [i ];
1382
- dstIm [i ] = src1Re [i ] * src2Im [i ] + (src2Re [i ] * src1Im [i ]);
1475
+ double src1Re_64 = (double ) src1Re [i ];
1476
+ double src1Im_64 = (double ) src1Im [i ];
1477
+ double src2Re_64 = (double ) src2Re [i ];
1478
+ double src2Im_64 = (double ) src2Im [i ];
1479
+ dstRe [i ] = (float ) ((src1Re_64 * src2Re_64 ) - src1Im_64 * src2Im_64 );
1480
+ dstIm [i ] = (float ) (src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64 ));
1383
1481
}
1384
1482
}
1385
1483
1386
-
1387
1484
static inline void cplxconjvecmul_C (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1388
1485
{
1389
1486
#ifdef OMP
@@ -1395,7 +1492,7 @@ static inline void cplxconjvecmul_C(complex32_t *src1, complex32_t *src2, comple
1395
1492
}
1396
1493
}
1397
1494
1398
- static inline void cplxconjvecmul_C2 (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1495
+ static inline void cplxconjvecmul_C_precise (complex32_t * src1 , complex32_t * src2 , complex32_t * dst , int len )
1399
1496
{
1400
1497
#ifdef OMP
1401
1498
#pragma omp simd
@@ -1417,7 +1514,7 @@ static inline void cplxconjvecmul_C_split(float *src1Re, float *src1Im, float *s
1417
1514
}
1418
1515
}
1419
1516
1420
- static inline void cplxconjvecmul_C_split2 (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1517
+ static inline void cplxconjvecmul_C_split_precise (float * src1Re , float * src1Im , float * src2Re , float * src2Im , float * dstRe , float * dstIm , int len )
1421
1518
{
1422
1519
#ifdef OMP
1423
1520
#pragma omp simd
0 commit comments