Skip to content

Commit 854f3e8

Browse files
committed
added experimental Altivec support and updated to version 0.2.0
1 parent 445ed4d commit 854f3e8

21 files changed

+556
-85
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Supported targets are :
2626
- AVX512 (experimental, most of float32 functions)
2727
- ARM Neon (through sse2neon plus some optimized functions).
2828
- RISC-V Vector extension (experimental)
29+
- PowerPC Alitivec (experimental)
2930

3031
128 bit functions (SSE and NEON) are name function128type, such as asin128f, which computes the arcsinus function on an float32 array. Float64 functions have the "d" suffix.
3132
256 bit functions (AVX/AVX2) have 256 instead of 128 in their name, such as asin256f.
@@ -39,7 +40,7 @@ The project has been tested on :
3940
- Intel Cannonlake Core-i7
4041
- Intel SDE (emulator) for AVX-512
4142
- Spike (emulator) for RISCV Vector
42-
- Qemu 5.X (emulator) )for arm/aarch64
43+
- Qemu 5.X (emulator) )for arm/aarch64, and ppc
4344
- Cortex-a53 (Raspberry Pi 3B)
4445
- Cortex-a9 (ZYBO)
4546

@@ -53,6 +54,7 @@ Simply include simd_utils.h in your C/C++ file, and compile with :
5354
- AVX support : gcc -DSSE -DAVX -mavx2 -c file.c -I .
5455
- AVX512 support : gcc -DSSE -DAVX -DAVX512 -march=skylake-avx512 -c file.c -I .
5556
- NEON support : aarch64-linux-gnu-gcc -DARM -DFMA -DSSE -flax-vector-conversions -c file.c -I .
57+
- ALTIVEC support : powerpc64-linux-gnu-gcc -DALTIVEC -DFMA -maltivec -flax-vector-conversions -c file.c -I .
5658

5759
For FMA support you need to add -DFMA and -mfma to x86 targets, and -DFMA to Armv8 targets.
5860

avx512_mathfun.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.1.12
3+
* Version : 0.2.0
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

mysincosf.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.1.12
3+
* Version : 0.2.0
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

simd_test.c

+25-28
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.1.12
3+
* Version : 0.2.0
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/
@@ -38,22 +38,18 @@
3838
#include <mkl_vml.h>
3939
#endif
4040

41-
#ifndef RISCV
41+
#if 0
4242
typedef ALIGN16_BEG union {
4343
float f[4];
4444
int i[4];
4545
v4sf v;
4646
} ALIGN16_END V4SF;
4747

48-
#ifdef AVX
4948
typedef ALIGN32_BEG union {
5049
float f[8];
5150
int i[8];
5251
v8sf v;
5352
} ALIGN32_END V8SF;
54-
55-
#endif
56-
5753
#endif
5854

5955
float l2_err(float *test, float *ref, int len)
@@ -65,10 +61,10 @@ float l2_err(float *test, float *ref, int len)
6561
}
6662

6763
#ifdef RELEASE
68-
if (l2_err > 0.00001f)
69-
printf("L2 ERR %0.7f\n", l2_err);
64+
if (l2_err > 0.0000001f)
65+
printf("L2 ERR %0.9f\n", l2_err);
7066
#else
71-
printf("L2 ERR %0.7f\n", l2_err);
67+
printf("L2 ERR %0.9f\n", l2_err);
7268
#endif
7369
return l2_err;
7470
}
@@ -82,10 +78,10 @@ float l2_err_u8(uint8_t *test, uint8_t *ref, int len)
8278
}
8379

8480
#ifdef RELEASE
85-
if (l2_err > 0.00001f)
86-
printf("L2 ERR %0.7f\n", l2_err);
81+
if (l2_err > 0.0000001f)
82+
printf("L2 ERR %0.9f\n", l2_err);
8783
#else
88-
printf("L2 ERR %0.7f\n", l2_err);
84+
printf("L2 ERR %0.9f\n", l2_err);
8985
#endif
9086
return l2_err;
9187
}
@@ -99,10 +95,10 @@ float l2_err_i32(int32_t *test, int32_t *ref, int len)
9995
}
10096

10197
#ifdef RELEASE
102-
if (l2_err > 0.00001f)
103-
printf("L2 ERR %0.7f\n", l2_err);
98+
if (l2_err > 0.000001f)
99+
printf("L2 ERR %0.9f\n", l2_err);
104100
#else
105-
printf("L2 ERR %0.7f\n", l2_err);
101+
printf("L2 ERR %0.9f\n", l2_err);
106102
#endif
107103

108104
return l2_err;
@@ -117,10 +113,10 @@ float l2_err_i16(int16_t *test, int16_t *ref, int len)
117113
}
118114

119115
#ifdef RELEASE
120-
if (l2_err > 0.00001f)
121-
printf("L2 ERR %0.7f\n", l2_err);
116+
if (l2_err > 0.000001f)
117+
printf("L2 ERR %0.9f\n", l2_err);
122118
#else
123-
printf("L2 ERR %0.7f\n", l2_err);
119+
printf("L2 ERR %0.9f\n", l2_err);
124120
#endif
125121

126122
return l2_err;
@@ -135,10 +131,10 @@ float l2_errd(double *test, double *ref, int len)
135131
}
136132

137133
#ifdef RELEASE
138-
if (l2_err > 0.00001)
139-
printf("L2 ERR %0.7f\n", l2_err);
134+
if (l2_err > 0.000000001)
135+
printf("L2 ERR %0.13f\n", l2_err);
140136
#else
141-
printf("L2 ERR %0.7f\n", l2_err);
137+
printf("L2 ERR %0.13f\n", l2_err);
142138
#endif
143139

144140
return l2_err;
@@ -588,7 +584,7 @@ int main(int argc, char **argv)
588584
l2_err(inout, inout_ref, len);
589585
#endif
590586

591-
#ifdef SSE
587+
#if defined(SSE) ||defined(ALTIVEC)
592588
clock_gettime(CLOCK_REALTIME, &start);
593589
zero128f(inout, len);
594590
clock_gettime(CLOCK_REALTIME, &stop);
@@ -1294,7 +1290,7 @@ int main(int argc, char **argv)
12941290
l2_err(inout3, inout_ref, len);
12951291
#endif
12961292

1297-
#ifdef SSE
1293+
#if defined(SSE) || defined(ALTIVEC)
12981294
clock_gettime(CLOCK_REALTIME, &start);
12991295
minevery128f(inout, inout2, inout3, len);
13001296
clock_gettime(CLOCK_REALTIME, &stop);
@@ -2029,7 +2025,7 @@ printf("\n");
20292025
clock_gettime(CLOCK_REALTIME, &stop);
20302026
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
20312027
printf("cplxvecmul_C %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
2032-
;
2028+
20332029

20342030
#ifdef IPP
20352031
clock_gettime(CLOCK_REALTIME, &start);
@@ -2052,7 +2048,7 @@ printf("\n");
20522048
}*/
20532049
#endif
20542050

2055-
#ifdef SSE
2051+
#if defined(SSE)
20562052
clock_gettime(CLOCK_REALTIME, &start);
20572053
cplxvecmul128f((complex32_t *) inout, (complex32_t *) inout2, (complex32_t *) inout2_ref, len);
20582054
clock_gettime(CLOCK_REALTIME, &stop);
@@ -2233,6 +2229,7 @@ printf("\n");
22332229
printf("cplxconjvecmul128f %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
22342230

22352231
l2_err(inout_ref, inout2_ref, 2 * len);
2232+
22362233
#endif
22372234

22382235
#ifdef AVX
@@ -3295,7 +3292,7 @@ printf("\n");
32953292
printf("MKLvsLn %d %lf\n", len, elapsed);
32963293
#endif
32973294

3298-
#ifdef SSE
3295+
#if defined(SSE) || defined(ALTIVEC)
32993296
clock_gettime(CLOCK_REALTIME, &start);
33003297
ln_128f(inout, inout2, len);
33013298
clock_gettime(CLOCK_REALTIME, &stop);
@@ -3400,7 +3397,7 @@ printf("\n");
34003397
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
34013398
printf("log2f_C %d %lf\n", len, elapsed);
34023399

3403-
#ifdef SSE
3400+
#if defined(SSE) || defined(ALTIVEC)
34043401
clock_gettime(CLOCK_REALTIME, &start);
34053402
log2_128f(inout, inout2, len);
34063403
clock_gettime(CLOCK_REALTIME, &stop);
@@ -5347,7 +5344,7 @@ for (int i = 0; i < len; i++){
53475344
l2_err(inout4, inout2_ref, len);
53485345
#endif
53495346

5350-
#ifdef SSE
5347+
#if defined(SSE) || defined(ALTIVEC)
53515348
clock_gettime(CLOCK_REALTIME, &start);
53525349
cplxtoreal128f(inout, inout3, inout4, len);
53535350
clock_gettime(CLOCK_REALTIME, &stop);

simd_test_opencl.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Project : SIMD_Utils
3-
* Version : 0.1.12
3+
* Version : 0.2.0
44
* Author : JishinMaster
55
* Licence : BSD-2
66
*/

0 commit comments

Comments
 (0)