Skip to content

Commit 6398c29

Browse files
committed
upgraded riscv vector to standard 0.10, and added sinf_vec, magnitudef_split_vec, meanf_vec and sumf_vec
1 parent 56ababe commit 6398c29

16 files changed

+898
-418
lines changed

mysincosf.h

+1-22
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,6 @@
77

88
#pragma once
99

10-
static float FOPI = 1.27323954473516;
11-
static float PIO4F = 0.7853981633974483096;
12-
13-
/* Note, these constants are for a 32-bit significand: */
14-
static float DP1 = 0.7853851318359375;
15-
static float DP2 = 1.30315311253070831298828125e-5;
16-
static float DP3 = 3.03855025325309630e-11;
17-
static float lossth = 65536.;
18-
19-
/* These are for a 24-bit significand: */
20-
/*static float DP1 = 0.78515625;
21-
static float DP2 = 2.4187564849853515625e-4;
22-
static float DP3 = 3.77489497744594108e-8;
23-
static float lossth = 8192.;*/
24-
25-
static float T24M1 = 16777215.;
26-
27-
static float sincof[] = {-1.9515295891E-4, 8.3321608736E-3, -1.6666654611E-1};
28-
static float coscof[] = {2.443315711809948E-005, -1.388731625493765E-003,
29-
4.166664568298827E-002};
30-
3110
static inline int mysincosf(float xx, float *s, float *c)
3211
{
3312
float x, y, y1, y2, z;
@@ -66,7 +45,7 @@ static inline int mysincosf(float xx, float *s, float *c)
6645
x = x - y * PIO4F;
6746
} else {
6847
/* Extended precision modular arithmetic */
69-
x = ((x - y * DP1) - y * DP2) - y * DP3;
48+
x = ((x + y * minus_cephes_DP1) + y * minus_cephes_DP2) + y * minus_cephes_DP3;
7049
}
7150
/*einits();*/
7251
z = x * x;

simd_test.c

+122-3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <mkl_vml.h>
3939
#endif
4040

41+
#ifndef RISCV
4142
typedef ALIGN16_BEG union {
4243
float f[4];
4344
int i[4];
@@ -53,6 +54,8 @@ typedef ALIGN32_BEG union {
5354

5455
#endif
5556

57+
#endif
58+
5659
float l2_err(float *test, float *ref, int len)
5760
{
5861
float l2_err = 0.0f;
@@ -1140,6 +1143,26 @@ int main(int argc, char **argv)
11401143
l2_err(inout3, inout_ref, len);
11411144
#endif
11421145

1146+
#ifdef RISCV
1147+
clock_gettime(CLOCK_REALTIME, &start);
1148+
maxeveryf_vec(inout, inout2, inout3, len);
1149+
clock_gettime(CLOCK_REALTIME, &stop);
1150+
1151+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
1152+
printf("maxeveryf_vec %d %lf\n", len, elapsed);
1153+
1154+
clock_gettime(CLOCK_REALTIME, &start);
1155+
for (l = 0; l < loop; l++)
1156+
maxeveryf_vec(inout, inout2, inout3, len);
1157+
1158+
clock_gettime(CLOCK_REALTIME, &stop);
1159+
1160+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
1161+
printf("maxeveryf_vec %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
1162+
1163+
l2_err(inout3, inout_ref, len);
1164+
#endif
1165+
11431166
printf("\n");
11441167
/////////////////////////////////////////////////////////// MINEVERY //////////////////////////////////////////////////////////////////////////////
11451168
printf("MINEVERY\n");
@@ -1234,6 +1257,26 @@ int main(int argc, char **argv)
12341257
l2_err(inout3, inout_ref, len);
12351258
#endif
12361259

1260+
#ifdef RISCV
1261+
clock_gettime(CLOCK_REALTIME, &start);
1262+
mineveryf_vec(inout, inout2, inout3, len);
1263+
clock_gettime(CLOCK_REALTIME, &stop);
1264+
1265+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
1266+
printf("mineveryf_vec %d %lf\n", len, elapsed);
1267+
1268+
clock_gettime(CLOCK_REALTIME, &start);
1269+
for (l = 0; l < loop; l++)
1270+
mineveryf_vec(inout, inout2, inout3, len);
1271+
1272+
clock_gettime(CLOCK_REALTIME, &stop);
1273+
1274+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
1275+
printf("mineveryf_vec %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
1276+
1277+
l2_err(inout3, inout_ref, len);
1278+
#endif
1279+
12371280
/*for (int i = 0; i < len; i++)
12381281
{
12391282
printf("%f %f %f\n",inout[i],inout2[i],inout2_ref[i]);
@@ -1634,6 +1677,26 @@ printf("\n");
16341677
printf("mean %f ref %f\n", mean, mean_ref);
16351678
#endif
16361679

1680+
#ifdef RISCV
1681+
clock_gettime(CLOCK_REALTIME, &start);
1682+
meanf_vec(inout, &mean, len);
1683+
clock_gettime(CLOCK_REALTIME, &stop);
1684+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
1685+
printf("meanf_vec %d %lf\n", len, elapsed);
1686+
printf("mean %f ref %f\n", mean, mean_ref);
1687+
1688+
clock_gettime(CLOCK_REALTIME, &start);
1689+
for (l = 0; l < loop; l++) {
1690+
meanf_vec(inout, &mean, len);
1691+
}
1692+
clock_gettime(CLOCK_REALTIME, &stop);
1693+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
1694+
printf("meanf_vec %d %lf\n", len, elapsed);
1695+
1696+
printf("mean %f ref %f\n", mean, mean_ref);
1697+
#endif
1698+
1699+
16371700
printf("\n");
16381701
/////////////////////////////////////////////////////////// MAGNITUDE_SPLIT //////////////////////////////////////////////////////////////////////////////
16391702
printf("MAGNITUDE_SPLIT\n");
@@ -1728,6 +1791,23 @@ printf("\n");
17281791
l2_err(inout_ref, inout2_ref, len);
17291792
#endif
17301793

1794+
#ifdef RISCV
1795+
clock_gettime(CLOCK_REALTIME, &start);
1796+
magnitudef_split_vec(inout, inout2, inout2_ref, len);
1797+
clock_gettime(CLOCK_REALTIME, &stop);
1798+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
1799+
printf("magnitudef_split_vec %d %lf\n", len, elapsed);
1800+
1801+
clock_gettime(CLOCK_REALTIME, &start);
1802+
for (l = 0; l < loop; l++)
1803+
magnitudef_split_vec(inout, inout2, inout2_ref, len);
1804+
clock_gettime(CLOCK_REALTIME, &stop);
1805+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
1806+
printf("magnitudef_split_vec %d %lf\n", len, elapsed);
1807+
1808+
l2_err(inout_ref, inout2_ref, len);
1809+
#endif
1810+
17311811
printf("\n");
17321812
/////////////////////////////////////////////////////////// MAGNITUDE_INTERLEAVE //////////////////////////////////////////////////////////////////////////////
17331813
printf("MAGNITUDE_INTERLEAVE\n");
@@ -2042,7 +2122,7 @@ printf("\n");
20422122
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
20432123
printf("cplxconjvecmul128f %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
20442124

2045-
l2_err(inout_ref, inout2_ref, 2*len);
2125+
l2_err(inout_ref, inout2_ref, 2 * len);
20462126
#endif
20472127

20482128
#ifdef AVX
@@ -2059,7 +2139,7 @@ printf("\n");
20592139
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
20602140
printf("cplxconjvecmul256f %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
20612141

2062-
l2_err(inout_ref, inout2_ref, 2*len);
2142+
l2_err(inout_ref, inout2_ref, 2 * len);
20632143
#endif
20642144

20652145
#ifdef AVX512
@@ -2076,7 +2156,7 @@ printf("\n");
20762156
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
20772157
printf("cplxconjvecmul512f %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
20782158

2079-
l2_err(inout_ref, inout2_ref, 2*len);
2159+
l2_err(inout_ref, inout2_ref, 2 * len);
20802160
#endif
20812161
printf("\n");
20822162

@@ -2532,6 +2612,10 @@ printf("\n");
25322612

25332613
flops = 34 * len; //TODO : check the right theoretical value
25342614

2615+
for(int i = 0; i < len; i++){
2616+
inout[i] = -(float)len/16.0f + 0.1f*(float)i;
2617+
}
2618+
25352619
clock_gettime(CLOCK_REALTIME, &start);
25362620
sinf_C(inout, inout2_ref, len);
25372621
clock_gettime(CLOCK_REALTIME, &stop);
@@ -2659,6 +2743,23 @@ printf("\n");
26592743
l2_err(inout2_ref, inout2, len);
26602744
#endif
26612745

2746+
#ifdef RISCV
2747+
clock_gettime(CLOCK_REALTIME, &start);
2748+
sinf_vec(inout, inout2, len);
2749+
clock_gettime(CLOCK_REALTIME, &stop);
2750+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
2751+
printf("sinf_vec %d %lf\n", len, elapsed);
2752+
2753+
clock_gettime(CLOCK_REALTIME, &start);
2754+
for (l = 0; l < loop; l++)
2755+
sinf_vec(inout, inout2, len);
2756+
clock_gettime(CLOCK_REALTIME, &stop);
2757+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
2758+
printf("sinf_vec %d %lf %0.3lf GFlops/s\n", len, elapsed, flops / (elapsed * 1e3));
2759+
l2_err(inout2_ref, inout2, len);
2760+
2761+
//for(int i = 0; i < len; i++) printf("%f %f %f\n",inout[i], inout2[i], inout2_ref[i]);
2762+
#endif
26622763

26632764
printf("\n");
26642765
/////////////////////////////////////////////////////////// COS //////////////////////////////////////////////////////////////////////////////
@@ -2935,6 +3036,24 @@ printf("\n");
29353036
l2_err(inout2_ref, inout3, len);
29363037
#endif
29373038

3039+
/*
3040+
#ifdef RISCV
3041+
clock_gettime(CLOCK_REALTIME, &start);
3042+
sincosf_vec(inout, inout2, inout3, len);
3043+
clock_gettime(CLOCK_REALTIME, &stop);
3044+
elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
3045+
printf("sincosf_vec %d %lf\n", len, elapsed);
3046+
3047+
clock_gettime(CLOCK_REALTIME, &start);
3048+
for (l = 0; l < loop; l++)
3049+
sincosf_vec(inout, inout2, inout3, len);
3050+
clock_gettime(CLOCK_REALTIME, &stop);
3051+
elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
3052+
printf("sincosf_vec %d %lf\n", len, elapsed);
3053+
l2_err(inout_ref, inout2, len);
3054+
l2_err(inout2_ref, inout3, len);
3055+
#endif
3056+
*/
29383057
printf("\n");
29393058
/////////////////////////////////////////////////////////// SINCOSD //////////////////////////////////////////////////////////////////////////////
29403059
printf("SINCOSD\n");

simd_utils.h

+37-12
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@ extern "C" {
2222
#include <math.h>
2323
#include <stdint.h>
2424

25+
static const float FOPI = 1.27323954473516f;
26+
static const float PIO4F = 0.7853981633974483096f;
27+
28+
/* Note, these constants are for a 32-bit significand: */
29+
/*
30+
static const float DP1 = 0.7853851318359375f;
31+
static const float DP2 = 1.30315311253070831298828125e-5f;
32+
static const float DP3 = 3.03855025325309630e-11f;
33+
static const float lossth = 65536.f;
34+
*/
35+
36+
/* These are for a 24-bit significand: */
37+
static const float minus_cephes_DP1 = -0.78515625f;
38+
static const float minus_cephes_DP2 = -2.4187564849853515625e-4f;
39+
static const float minus_cephes_DP3 = -3.77489497744594108e-8f;
40+
static float lossth = 8192.;
41+
42+
static const float T24M1 = 16777215.f;
43+
44+
static const float sincof[] = {-1.9515295891E-4f, 8.3321608736E-3f, -1.6666654611E-1f};
45+
static const float coscof[] = {2.443315711809948E-5f, -1.388731625493765E-3f,
46+
4.166664568298827E-2f};
47+
static const int32_t sign_mask = 0x80000000;
48+
static const int32_t inv_sign_mask = ~sign_mask;
49+
2550
#include "mysincosf.h"
2651

2752
#define INVLN10 0.4342944819032518f //0.4342944819f
@@ -89,7 +114,7 @@ typedef enum {
89114
static inline int isAligned(uintptr_t ptr, size_t alignment)
90115
{
91116
#ifndef ALWAYS_ALIGNED
92-
if (((uintptr_t)(ptr) % alignment) == 0)
117+
if (((uintptr_t) (ptr) % alignment) == 0)
93118
return 1;
94119
return 0;
95120
#else
@@ -100,8 +125,8 @@ static inline int isAligned(uintptr_t ptr, size_t alignment)
100125
static inline int areAligned2(uintptr_t ptr1, uintptr_t ptr2, size_t alignment)
101126
{
102127
#ifndef ALWAYS_ALIGNED
103-
if (((uintptr_t)(ptr1) % alignment) == 0)
104-
if (((uintptr_t)(ptr2) % alignment) == 0)
128+
if (((uintptr_t) (ptr1) % alignment) == 0)
129+
if (((uintptr_t) (ptr2) % alignment) == 0)
105130
return 1;
106131
return 0;
107132
#else
@@ -112,9 +137,9 @@ static inline int areAligned2(uintptr_t ptr1, uintptr_t ptr2, size_t alignment)
112137
static inline int areAligned3(uintptr_t ptr1, uintptr_t ptr2, uintptr_t ptr3, size_t alignment)
113138
{
114139
#ifndef ALWAYS_ALIGNED
115-
if (((uintptr_t)(ptr1) % alignment) == 0)
116-
if (((uintptr_t)(ptr2) % alignment) == 0)
117-
if (((uintptr_t)(ptr3) % alignment) == 0)
140+
if (((uintptr_t) (ptr1) % alignment) == 0)
141+
if (((uintptr_t) (ptr2) % alignment) == 0)
142+
if (((uintptr_t) (ptr3) % alignment) == 0)
118143
return 1;
119144
return 0;
120145
#else
@@ -681,7 +706,7 @@ static inline int posix_memalign(void **pointer, size_t len, int alignement)
681706
void *p, *p0 = malloc(len + alignement);
682707
if (!p0)
683708
return (void *) NULL;
684-
p = (void *) (((size_t) p0 + alignement) & (~((size_t)(alignement - 1))));
709+
p = (void *) (((size_t) p0 + alignement) & (~((size_t) (alignement - 1))));
685710
*((void **) p - 1) = p0;
686711

687712
*pointer = p;
@@ -694,7 +719,7 @@ static inline void *aligned_malloc(size_t len, int alignement)
694719
void *p, *p0 = malloc(len + alignement);
695720
if (!p0)
696721
return (void *) NULL;
697-
p = (void *) (((size_t) p0 + alignement) & (~((size_t)(alignement - 1))));
722+
p = (void *) (((size_t) p0 + alignement) & (~((size_t) (alignement - 1))));
698723
*((void **) p - 1) = p0;
699724
return p;
700725
}
@@ -944,31 +969,31 @@ static inline void convertFloat32ToU8_C(float *src, uint8_t *dst, int len, int r
944969
#endif
945970
for (int i = 0; i < len; i++) {
946971
float tmp = floorf(src[i] * scale_fact_mult);
947-
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
972+
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
948973
}
949974
} else if (rounding_mode == RndNear) {
950975
#ifdef OMP
951976
#pragma omp simd
952977
#endif
953978
for (int i = 0; i < len; i++) {
954979
float tmp = roundf(src[i] * scale_fact_mult);
955-
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
980+
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
956981
}
957982
} else if (rounding_mode == RndFinancial) {
958983
#ifdef OMP
959984
#pragma omp simd
960985
#endif
961986
for (int i = 0; i < len; i++) {
962987
float tmp = (roundf(src[i] * scale_fact_mult * 0.5f) / 2.0f);
963-
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
988+
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
964989
}
965990
} else {
966991
#ifdef OMP
967992
#pragma omp simd
968993
#endif
969994
for (int i = 0; i < len; i++) {
970995
float tmp = src[i] * scale_fact_mult;
971-
dst[i] = (uint8_t)(tmp > 255.0f ? 255.0f : tmp);
996+
dst[i] = (uint8_t) (tmp > 255.0f ? 255.0f : tmp);
972997
}
973998
}
974999
}

0 commit comments

Comments
 (0)