38
38
#include <mkl_vml.h>
39
39
#endif
40
40
41
+ #ifndef RISCV
41
42
typedef ALIGN16_BEG union {
42
43
float f [4 ];
43
44
int i [4 ];
@@ -53,6 +54,8 @@ typedef ALIGN32_BEG union {
53
54
54
55
#endif
55
56
57
+ #endif
58
+
56
59
float l2_err (float * test , float * ref , int len )
57
60
{
58
61
float l2_err = 0.0f ;
@@ -1140,6 +1143,26 @@ int main(int argc, char **argv)
1140
1143
l2_err (inout3 , inout_ref , len );
1141
1144
#endif
1142
1145
1146
+ #ifdef RISCV
1147
+ clock_gettime (CLOCK_REALTIME , & start );
1148
+ maxeveryf_vec (inout , inout2 , inout3 , len );
1149
+ clock_gettime (CLOCK_REALTIME , & stop );
1150
+
1151
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
1152
+ printf ("maxeveryf_vec %d %lf\n" , len , elapsed );
1153
+
1154
+ clock_gettime (CLOCK_REALTIME , & start );
1155
+ for (l = 0 ; l < loop ; l ++ )
1156
+ maxeveryf_vec (inout , inout2 , inout3 , len );
1157
+
1158
+ clock_gettime (CLOCK_REALTIME , & stop );
1159
+
1160
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
1161
+ printf ("maxeveryf_vec %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
1162
+
1163
+ l2_err (inout3 , inout_ref , len );
1164
+ #endif
1165
+
1143
1166
printf ("\n" );
1144
1167
/////////////////////////////////////////////////////////// MINEVERY //////////////////////////////////////////////////////////////////////////////
1145
1168
printf ("MINEVERY\n" );
@@ -1234,6 +1257,26 @@ int main(int argc, char **argv)
1234
1257
l2_err (inout3 , inout_ref , len );
1235
1258
#endif
1236
1259
1260
+ #ifdef RISCV
1261
+ clock_gettime (CLOCK_REALTIME , & start );
1262
+ mineveryf_vec (inout , inout2 , inout3 , len );
1263
+ clock_gettime (CLOCK_REALTIME , & stop );
1264
+
1265
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
1266
+ printf ("mineveryf_vec %d %lf\n" , len , elapsed );
1267
+
1268
+ clock_gettime (CLOCK_REALTIME , & start );
1269
+ for (l = 0 ; l < loop ; l ++ )
1270
+ mineveryf_vec (inout , inout2 , inout3 , len );
1271
+
1272
+ clock_gettime (CLOCK_REALTIME , & stop );
1273
+
1274
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
1275
+ printf ("mineveryf_vec %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
1276
+
1277
+ l2_err (inout3 , inout_ref , len );
1278
+ #endif
1279
+
1237
1280
/*for (int i = 0; i < len; i++)
1238
1281
{
1239
1282
printf("%f %f %f\n",inout[i],inout2[i],inout2_ref[i]);
@@ -1634,6 +1677,26 @@ printf("\n");
1634
1677
printf ("mean %f ref %f\n" , mean , mean_ref );
1635
1678
#endif
1636
1679
1680
+ #ifdef RISCV
1681
+ clock_gettime (CLOCK_REALTIME , & start );
1682
+ meanf_vec (inout , & mean , len );
1683
+ clock_gettime (CLOCK_REALTIME , & stop );
1684
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
1685
+ printf ("meanf_vec %d %lf\n" , len , elapsed );
1686
+ printf ("mean %f ref %f\n" , mean , mean_ref );
1687
+
1688
+ clock_gettime (CLOCK_REALTIME , & start );
1689
+ for (l = 0 ; l < loop ; l ++ ) {
1690
+ meanf_vec (inout , & mean , len );
1691
+ }
1692
+ clock_gettime (CLOCK_REALTIME , & stop );
1693
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
1694
+ printf ("meanf_vec %d %lf\n" , len , elapsed );
1695
+
1696
+ printf ("mean %f ref %f\n" , mean , mean_ref );
1697
+ #endif
1698
+
1699
+
1637
1700
printf ("\n" );
1638
1701
/////////////////////////////////////////////////////////// MAGNITUDE_SPLIT //////////////////////////////////////////////////////////////////////////////
1639
1702
printf ("MAGNITUDE_SPLIT\n" );
@@ -1728,6 +1791,23 @@ printf("\n");
1728
1791
l2_err (inout_ref , inout2_ref , len );
1729
1792
#endif
1730
1793
1794
+ #ifdef RISCV
1795
+ clock_gettime (CLOCK_REALTIME , & start );
1796
+ magnitudef_split_vec (inout , inout2 , inout2_ref , len );
1797
+ clock_gettime (CLOCK_REALTIME , & stop );
1798
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
1799
+ printf ("magnitudef_split_vec %d %lf\n" , len , elapsed );
1800
+
1801
+ clock_gettime (CLOCK_REALTIME , & start );
1802
+ for (l = 0 ; l < loop ; l ++ )
1803
+ magnitudef_split_vec (inout , inout2 , inout2_ref , len );
1804
+ clock_gettime (CLOCK_REALTIME , & stop );
1805
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
1806
+ printf ("magnitudef_split_vec %d %lf\n" , len , elapsed );
1807
+
1808
+ l2_err (inout_ref , inout2_ref , len );
1809
+ #endif
1810
+
1731
1811
printf ("\n" );
1732
1812
/////////////////////////////////////////////////////////// MAGNITUDE_INTERLEAVE //////////////////////////////////////////////////////////////////////////////
1733
1813
printf ("MAGNITUDE_INTERLEAVE\n" );
@@ -2042,7 +2122,7 @@ printf("\n");
2042
2122
elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
2043
2123
printf ("cplxconjvecmul128f %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
2044
2124
2045
- l2_err (inout_ref , inout2_ref , 2 * len );
2125
+ l2_err (inout_ref , inout2_ref , 2 * len );
2046
2126
#endif
2047
2127
2048
2128
#ifdef AVX
@@ -2059,7 +2139,7 @@ printf("\n");
2059
2139
elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
2060
2140
printf ("cplxconjvecmul256f %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
2061
2141
2062
- l2_err (inout_ref , inout2_ref , 2 * len );
2142
+ l2_err (inout_ref , inout2_ref , 2 * len );
2063
2143
#endif
2064
2144
2065
2145
#ifdef AVX512
@@ -2076,7 +2156,7 @@ printf("\n");
2076
2156
elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
2077
2157
printf ("cplxconjvecmul512f %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
2078
2158
2079
- l2_err (inout_ref , inout2_ref , 2 * len );
2159
+ l2_err (inout_ref , inout2_ref , 2 * len );
2080
2160
#endif
2081
2161
printf ("\n" );
2082
2162
@@ -2532,6 +2612,10 @@ printf("\n");
2532
2612
2533
2613
flops = 34 * len ; //TODO : check the right theoretical value
2534
2614
2615
+ for (int i = 0 ; i < len ; i ++ ){
2616
+ inout [i ] = - (float )len /16.0f + 0.1f * (float )i ;
2617
+ }
2618
+
2535
2619
clock_gettime (CLOCK_REALTIME , & start );
2536
2620
sinf_C (inout , inout2_ref , len );
2537
2621
clock_gettime (CLOCK_REALTIME , & stop );
@@ -2659,6 +2743,23 @@ printf("\n");
2659
2743
l2_err (inout2_ref , inout2 , len );
2660
2744
#endif
2661
2745
2746
+ #ifdef RISCV
2747
+ clock_gettime (CLOCK_REALTIME , & start );
2748
+ sinf_vec (inout , inout2 , len );
2749
+ clock_gettime (CLOCK_REALTIME , & stop );
2750
+ elapsed = (stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ;
2751
+ printf ("sinf_vec %d %lf\n" , len , elapsed );
2752
+
2753
+ clock_gettime (CLOCK_REALTIME , & start );
2754
+ for (l = 0 ; l < loop ; l ++ )
2755
+ sinf_vec (inout , inout2 , len );
2756
+ clock_gettime (CLOCK_REALTIME , & stop );
2757
+ elapsed = ((stop .tv_sec - start .tv_sec ) * 1e6 + (stop .tv_nsec - start .tv_nsec ) * 1e-3 ) / (double ) loop ;
2758
+ printf ("sinf_vec %d %lf %0.3lf GFlops/s\n" , len , elapsed , flops / (elapsed * 1e3 ));
2759
+ l2_err (inout2_ref , inout2 , len );
2760
+
2761
+ //for(int i = 0; i < len; i++) printf("%f %f %f\n",inout[i], inout2[i], inout2_ref[i]);
2762
+ #endif
2662
2763
2663
2764
printf ("\n" );
2664
2765
/////////////////////////////////////////////////////////// COS //////////////////////////////////////////////////////////////////////////////
@@ -2935,6 +3036,24 @@ printf("\n");
2935
3036
l2_err (inout2_ref , inout3 , len );
2936
3037
#endif
2937
3038
3039
+ /*
3040
+ #ifdef RISCV
3041
+ clock_gettime(CLOCK_REALTIME, &start);
3042
+ sincosf_vec(inout, inout2, inout3, len);
3043
+ clock_gettime(CLOCK_REALTIME, &stop);
3044
+ elapsed = (stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3;
3045
+ printf("sincosf_vec %d %lf\n", len, elapsed);
3046
+
3047
+ clock_gettime(CLOCK_REALTIME, &start);
3048
+ for (l = 0; l < loop; l++)
3049
+ sincosf_vec(inout, inout2, inout3, len);
3050
+ clock_gettime(CLOCK_REALTIME, &stop);
3051
+ elapsed = ((stop.tv_sec - start.tv_sec) * 1e6 + (stop.tv_nsec - start.tv_nsec) * 1e-3) / (double) loop;
3052
+ printf("sincosf_vec %d %lf\n", len, elapsed);
3053
+ l2_err(inout_ref, inout2, len);
3054
+ l2_err(inout2_ref, inout3, len);
3055
+ #endif
3056
+ */
2938
3057
printf ("\n" );
2939
3058
/////////////////////////////////////////////////////////// SINCOSD //////////////////////////////////////////////////////////////////////////////
2940
3059
printf ("SINCOSD\n" );
0 commit comments