Skip to content

Commit c6646ba

Browse files
committed
version 0.2.3 : added _precise C functions for cplxvecmul/cplxconjvecmul/cplxvecdiv/powerspectr/magnitude
1 parent d0da887 commit c6646ba

20 files changed

+303
-67
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ The following table is a work in progress, "X" means there is not yet an impleme
144144
| ceil128f | ceil256f | ceil512f | ceilf_C | ippsCeil_32f | X | X |
145145
| floor128f | floor256f | floor512f | floorf_C | ippsFloor_32f | X | X |
146146
| trunc128f | trunc256f | trunc512f | truncf_C | ippsTrunc_32f | X | X |
147-
| cplxvecmul128f | cplxvecmul256f | cplxvecmul512f | cplxvecmul_C | ippsMul_32fc_A24 | cplxvecmul_vec | X |
148-
| cplxvecmul128f_split | cplxvecmul256f_split | cplxvecmul512f_split | cplxvecmul_C_split | X | cplxvecmul_vec_split | X |
147+
| cplxvecmul128f | cplxvecmul256f | cplxvecmul512f | cplxvecmul_C/precise | ippsMul_32fc_A11/24 | cplxvecmul_vec | X |
148+
| cplxvecmul128f_split | cplxvecmul256f_split | cplxvecmul512f_split | cplxvecmul_C_split/precise| X | cplxvecmul_vec_split | X |
149149
| cplxconjvecmul128f | cplxconjvecmul256f | cplxconjvecmul512f | cplxconjvecmul_C | ippsMulByConj_32fc_A24 | X | X |
150150
| cplxconjvecmul128f_split | cplxconjvecmul256f_split | cplxconjvecmul512f_split | cplxconjvecmul_C_split | X | X | X |
151151
| cplxconj128f | cplxconj256f | cplxconj512f | cplxconj_C | ippsConj_32fc_A24 | X | X |

avx512_mathfun.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

mysincosf.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_test.c

+165-25
Large diffs are not rendered by default.

simd_test_opencl.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils.h

+111-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/
@@ -932,7 +932,19 @@ static inline void magnitudef_C_interleaved(complex32_t *src, float *dst, int le
932932
#pragma omp simd
933933
#endif
934934
for (int i = 0; i < len; i++) {
935-
dst[i] = sqrtf(src[i].re * src[i].re + (src[i].im * src[i].im));
935+
dst[i] = sqrtf((src[i].re * src[i].re) + src[i].im * src[i].im);
936+
}
937+
}
938+
939+
static inline void magnitudef_C_interleaved_precise(complex32_t *src, float *dst, int len)
940+
{
941+
#ifdef OMP
942+
#pragma omp simd
943+
#endif
944+
for (int i = 0; i < len; i++) {
945+
double srcRe_64 = (double) src[i].re;
946+
double srcIm_64 = (double) src[i].im;
947+
dst[i] = (float) (sqrt((srcRe_64 * srcRe_64) + srcIm_64 * srcIm_64));
936948
}
937949
}
938950

@@ -942,18 +954,41 @@ static inline void magnitudef_C_split(float *srcRe, float *srcIm, float *dst, in
942954
#pragma omp simd
943955
#endif
944956
for (int i = 0; i < len; i++) {
945-
dst[i] = sqrtf(srcRe[i] * srcRe[i] + (srcIm[i] * srcIm[i]));
957+
dst[i] = sqrtf((srcRe[i] * srcRe[i]) + srcIm[i] * srcIm[i]);
946958
}
947959
}
948960

961+
static inline void magnitudef_C_split_precise(float *srcRe, float *srcIm, float *dst, int len)
962+
{
963+
#ifdef OMP
964+
#pragma omp simd
965+
#endif
966+
for (int i = 0; i < len; i++) {
967+
double srcRe_64 = (double) srcRe[i];
968+
double srcIm_64 = (double) srcIm[i];
969+
dst[i] = (float) (sqrt((srcRe_64 * srcRe_64) + srcIm_64 * srcIm_64));
970+
}
971+
}
949972

950973
static inline void powerspectf_C_split(float *srcRe, float *srcIm, float *dst, int len)
951974
{
952975
#ifdef OMP
953976
#pragma omp simd
954977
#endif
955978
for (int i = 0; i < len; i++) {
956-
dst[i] = srcRe[i] * srcRe[i] + (srcIm[i] * srcIm[i]);
979+
dst[i] = (srcRe[i] * srcRe[i]) + srcIm[i] * srcIm[i];
980+
}
981+
}
982+
983+
static inline void powerspectf_C_split_precise(float *srcRe, float *srcIm, float *dst, int len)
984+
{
985+
#ifdef OMP
986+
#pragma omp simd
987+
#endif
988+
for (int i = 0; i < len; i++) {
989+
double srcRe_64 = (double) srcRe[i];
990+
double srcIm_64 = (double) srcIm[i];
991+
dst[i] = (float) ((srcRe_64 * srcRe_64) + srcIm_64 * srcIm_64);
957992
}
958993
}
959994

@@ -963,7 +998,19 @@ static inline void powerspectf_C_interleaved(complex32_t *src, float *dst, int l
963998
#pragma omp simd
964999
#endif
9651000
for (int i = 0; i < len; i++) {
966-
dst[i] = src[i].re * src[i].re + (src[i].im * src[i].im);
1001+
dst[i] = (src[i].re * src[i].re) + src[i].im * src[i].im;
1002+
}
1003+
}
1004+
1005+
static inline void powerspectf_C_interleaved_precise(complex32_t *src, float *dst, int len)
1006+
{
1007+
#ifdef OMP
1008+
#pragma omp simd
1009+
#endif
1010+
for (int i = 0; i < len; i++) {
1011+
double srcRe_64 = (double) src[i].re;
1012+
double srcIm_64 = (double) src[i].im;
1013+
dst[i] = (float) ((srcRe_64 * srcRe_64) + srcIm_64 * srcIm_64);
9671014
}
9681015
}
9691016

@@ -1305,6 +1352,22 @@ static inline void cplxvecdiv_C(complex32_t *src1, complex32_t *src2, complex32_
13051352
}
13061353
}
13071354

1355+
static inline void cplxvecdiv_C_precise(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
1356+
{
1357+
#ifdef OMP
1358+
#pragma omp simd
1359+
#endif
1360+
for (int i = 0; i < len; i++) {
1361+
double src1Re_64 = (double) src1[i].re;
1362+
double src1Im_64 = (double) src1[i].im;
1363+
double src2Re_64 = (double) src2[i].re;
1364+
double src2Im_64 = (double) src2[i].im;
1365+
double c2d2 = src2Re_64 * src2Re_64 + src2Im_64 * src2Im_64;
1366+
dst[i].re = (float) ((src1Re_64 * src2Re_64 + (src1Im_64 * src2Im_64)) / c2d2);
1367+
dst[i].im = (float) ((-src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64)) / c2d2);
1368+
}
1369+
}
1370+
13081371

13091372
static inline void cplxvecdiv_C_split(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
13101373
{
@@ -1318,6 +1381,22 @@ static inline void cplxvecdiv_C_split(float *src1Re, float *src1Im, float *src2R
13181381
}
13191382
}
13201383

1384+
static inline void cplxvecdiv_C_split_precise(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
1385+
{
1386+
#ifdef OMP
1387+
#pragma omp simd
1388+
#endif
1389+
for (int i = 0; i < len; i++) {
1390+
double src1Re_64 = (double) src1Re[i];
1391+
double src1Im_64 = (double) src1Im[i];
1392+
double src2Re_64 = (double) src2Re[i];
1393+
double src2Im_64 = (double) src2Im[i];
1394+
double c2d2 = src2Re_64 * src2Re_64 + src2Im_64 * src2Im_64;
1395+
dstRe[i] = (float) ((src1Re_64 * src2Re_64 + (src1Im_64 * src2Im_64)) / c2d2);
1396+
dstIm[i] = (float) ((-src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64)) / c2d2);
1397+
}
1398+
}
1399+
13211400
static inline void cplxvecmul_C(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
13221401
{
13231402
#ifdef OMP
@@ -1329,6 +1408,21 @@ static inline void cplxvecmul_C(complex32_t *src1, complex32_t *src2, complex32_
13291408
}
13301409
}
13311410

1411+
static inline void cplxvecmul_C_precise(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
1412+
{
1413+
#ifdef OMP
1414+
#pragma omp simd
1415+
#endif
1416+
for (int i = 0; i < len; i++) {
1417+
double src1Re_64 = (double) src1[i].re;
1418+
double src1Im_64 = (double) src1[i].im;
1419+
double src2Re_64 = (double) src2[i].re;
1420+
double src2Im_64 = (double) src2[i].im;
1421+
dst[i].re = (float) ((src1Re_64 * src2Re_64) - src1Im_64 * src2Im_64);
1422+
dst[i].im = (float) (src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64));
1423+
}
1424+
}
1425+
13321426
static inline void cplxvecmul_C_unrolled8(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
13331427
{
13341428
int stop_len = len / 8;
@@ -1361,29 +1455,32 @@ static inline void cplxvecmul_C_unrolled8(complex32_t *src1, complex32_t *src2,
13611455
}
13621456
}
13631457

1364-
static inline void cplxvecmul_C2(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
1458+
static inline void cplxvecmul_C_split(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
13651459
{
13661460
#ifdef OMP
13671461
#pragma omp simd
13681462
#endif
13691463
for (int i = 0; i < len; i++) {
1370-
dst[i].re = (float) ((double) src1[i].re * (double) src2[i].re - (double) src1[i].im * (double) src2[i].im);
1371-
dst[i].im = (float) ((double) src1[i].re * (double) src2[i].im + (double) src2[i].re * (double) src1[i].im);
1464+
dstRe[i] = (src1Re[i] * src2Re[i]) - src1Im[i] * src2Im[i];
1465+
dstIm[i] = src1Re[i] * src2Im[i] + (src2Re[i] * src1Im[i]);
13721466
}
13731467
}
13741468

1375-
static inline void cplxvecmul_C_split(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
1469+
static inline void cplxvecmul_C_split_precise(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
13761470
{
13771471
#ifdef OMP
13781472
#pragma omp simd
13791473
#endif
13801474
for (int i = 0; i < len; i++) {
1381-
dstRe[i] = (src1Re[i] * src2Re[i]) - src1Im[i] * src2Im[i];
1382-
dstIm[i] = src1Re[i] * src2Im[i] + (src2Re[i] * src1Im[i]);
1475+
double src1Re_64 = (double) src1Re[i];
1476+
double src1Im_64 = (double) src1Im[i];
1477+
double src2Re_64 = (double) src2Re[i];
1478+
double src2Im_64 = (double) src2Im[i];
1479+
dstRe[i] = (float) ((src1Re_64 * src2Re_64) - src1Im_64 * src2Im_64);
1480+
dstIm[i] = (float) (src1Re_64 * src2Im_64 + (src2Re_64 * src1Im_64));
13831481
}
13841482
}
13851483

1386-
13871484
static inline void cplxconjvecmul_C(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
13881485
{
13891486
#ifdef OMP
@@ -1395,7 +1492,7 @@ static inline void cplxconjvecmul_C(complex32_t *src1, complex32_t *src2, comple
13951492
}
13961493
}
13971494

1398-
static inline void cplxconjvecmul_C2(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
1495+
static inline void cplxconjvecmul_C_precise(complex32_t *src1, complex32_t *src2, complex32_t *dst, int len)
13991496
{
14001497
#ifdef OMP
14011498
#pragma omp simd
@@ -1417,7 +1514,7 @@ static inline void cplxconjvecmul_C_split(float *src1Re, float *src1Im, float *s
14171514
}
14181515
}
14191516

1420-
static inline void cplxconjvecmul_C_split2(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
1517+
static inline void cplxconjvecmul_C_split_precise(float *src1Re, float *src1Im, float *src2Re, float *src2Im, float *dstRe, float *dstIm, int len)
14211518
{
14221519
#ifdef OMP
14231520
#pragma omp simd

simd_utils_altivec_float.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx512_double.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx512_float.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx512_int32.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx_double.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx_float.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_avx_int32.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_constants.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_kernel.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
/*
44
* Project : SIMD_Utils
5-
* Version : 0.2.2
5+
* Version : 0.2.3
66
* Author : JishinMaster
77
* Licence : BSD-2
88
*/

simd_utils_riscv.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_utils_sse_double.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.2.2
3+
* Version : 0.2.3
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

0 commit comments

Comments
 (0)