1
1
/*
2
2
Copyright (c) 2013, Intel Corporation
3
3
4
- Redistribution and use in source and binary forms, with or without
5
- modification, are permitted provided that the following conditions
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions
6
6
are met:
7
7
8
- * Redistributions of source code must retain the above copyright
8
+ * Redistributions of source code must retain the above copyright
9
9
notice, this list of conditions and the following disclaimer.
10
- * Redistributions in binary form must reproduce the above
11
- copyright notice, this list of conditions and the following
12
- disclaimer in the documentation and/or other materials provided
10
+ * Redistributions in binary form must reproduce the above
11
+ copyright notice, this list of conditions and the following
12
+ disclaimer in the documentation and/or other materials provided
13
13
with the distribution.
14
- * Neither the name of Intel Corporation nor the names of its
15
- contributors may be used to endorse or promote products
16
- derived from this software without specific prior written
14
+ * Neither the name of Intel Corporation nor the names of its
15
+ contributors may be used to endorse or promote products
16
+ derived from this software without specific prior written
17
17
permission.
18
18
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23
- COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29
- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
30
POSSIBILITY OF SUCH DAMAGE.
31
31
*/
32
32
@@ -67,44 +67,44 @@ POSSIBILITY OF SUCH DAMAGE.
67
67
68
68
69
69
/**********************************************************************
70
-
70
+
71
71
NAME: nstream
72
-
72
+
73
73
PURPOSE: To compute memory bandwidth when adding a vector of a given
74
- number of double precision values to the scalar multiple of
74
+ number of double precision values to the scalar multiple of
75
75
another vector of the same length, and storing the result in
76
- a third vector.
77
-
76
+ a third vector.
77
+
78
78
USAGE: The program takes as input the number of iterations to loop
79
- over the triad vectors, the length of the vectors, and the
79
+ over the triad vectors, the length of the vectors, and the
80
80
offset between vectors.
81
-
81
+
82
82
<progname> <# iterations> <vector length> <offset>
83
-
84
- The output consists of diagnostics to make sure the
83
+
84
+ The output consists of diagnostics to make sure the
85
85
algorithm worked, and of timing statistics.
86
-
86
+
87
87
FUNCTIONS CALLED:
88
-
89
- Other than MPI or standard C functions, the following
88
+
89
+ Other than MPI or standard C functions, the following
90
90
external functions are used in this program:
91
-
91
+
92
92
wtime()
93
93
bail_out()
94
94
checkTRIADresults()
95
-
96
- NOTES: Bandwidth is determined as the number of words read, plus the
97
- number of words written, times the size of the words, divided
98
- by the execution time. For a vector length of N, the total
95
+
96
+ NOTES: Bandwidth is determined as the number of words read, plus the
97
+ number of words written, times the size of the words, divided
98
+ by the execution time. For a vector length of N, the total
99
99
number of words read and written is 4*N*sizeof(double).
100
-
100
+
101
101
HISTORY: This code is loosely based on the Stream benchmark by John
102
102
McCalpin, but does not follow all the Stream rules. Hence,
103
103
reported results should not be associated with Stream in
104
104
external publications
105
- REVISION: Modified by Rob Van der Wijngaart, December 2005, to
105
+ REVISION: Modified by Rob Van der Wijngaart, December 2005, to
106
106
parameterize vector size and offsets through compiler flags.
107
- Also removed all Stream cases except TRIAD.
107
+ Also removed all Stream cases except TRIAD.
108
108
REVISION: Modified by Rob Van der Wijngaart, March 2006, to handle MPI.
109
109
REVISION: Modified by Rob Van der Wijngaart, May 2006, to introduce
110
110
dependence between successive triad operations. This is
@@ -115,16 +115,23 @@ REVISION: Modified by Rob Van der Wijngaart, November 2014, replaced
115
115
between iterations (must now be included in timing) with
116
116
accumulation: a[] += b[] + scalar*c[]
117
117
**********************************************************************/
118
-
118
+
119
119
#include <par-res-kern_general.h>
120
120
#include <par-res-kern_mpi.h>
121
-
121
+
122
+ #ifdef __faasm
123
+ #include <faasm/faasm.h>
124
+ #endif
125
+
122
126
#define SCALAR 3.0
123
-
127
+
124
128
static int checkTRIADresults (int , long int , double * );
125
-
126
- int main (int argc , char * * argv )
129
+
130
+ int main (int argc , char * * argv )
127
131
{
132
+ const char * message = "Hello Faasm!" ;
133
+ faasmSetOutput ((uint8_t * )message , 12 );
134
+
128
135
long int j , iter ; /* dummies */
129
136
double scalar ; /* constant used in Triad operation */
130
137
int iterations ; /* number of times vector loop gets repeated */
@@ -134,7 +141,7 @@ int main(int argc, char **argv)
134
141
double bytes ; /* memory IO size */
135
142
size_t space ; /* memory used for a single vector */
136
143
double local_nstream_time ,/* timing parameters */
137
- nstream_time ,
144
+ nstream_time ,
138
145
avgtime ;
139
146
int Num_procs , /* number of ranks */
140
147
my_ID , /* rank */
@@ -143,11 +150,11 @@ int main(int argc, char **argv)
143
150
double * RESTRICT a ; /* main vector */
144
151
double * RESTRICT b ; /* main vector */
145
152
double * RESTRICT c ; /* main vector */
146
-
153
+
147
154
/**********************************************************************************
148
- * process and test input parameters
155
+ * process and test input parameters
149
156
***********************************************************************************/
150
-
157
+
151
158
MPI_Init (& argc ,& argv );
152
159
MPI_Comm_size (MPI_COMM_WORLD ,& Num_procs );
153
160
MPI_Comm_rank (MPI_COMM_WORLD ,& my_ID );
@@ -205,7 +212,7 @@ int main(int argc, char **argv)
205
212
c = b + length + offset ;
206
213
207
214
bytes = 4.0 * sizeof (double ) * length * Num_procs ;
208
-
215
+
209
216
if (my_ID == root ) {
210
217
printf ("Number of ranks = %d\n" , Num_procs );
211
218
printf ("Vector length = %ld\n" , total_length );
@@ -218,31 +225,31 @@ int main(int argc, char **argv)
218
225
b [j ] = 2.0 ;
219
226
c [j ] = 2.0 ;
220
227
}
221
-
228
+
222
229
/* --- MAIN LOOP --- repeat Triad iterations times --- */
223
-
230
+
224
231
scalar = SCALAR ;
225
-
232
+
226
233
for (iter = 0 ; iter <=iterations ; iter ++ ) {
227
-
234
+
228
235
/* start timer after a warmup iteration */
229
- if (iter == 1 ) {
236
+ if (iter == 1 ) {
230
237
MPI_Barrier (MPI_COMM_WORLD );
231
238
local_nstream_time = wtime ();
232
239
}
233
240
234
241
for (j = 0 ; j < length ; j ++ ) a [j ] += b [j ]+ scalar * c [j ];
235
242
236
243
} /* end iterations */
237
-
244
+
238
245
/*********************************************************************
239
246
** Analyze and output results.
240
247
*********************************************************************/
241
248
242
249
local_nstream_time = wtime () - local_nstream_time ;
243
250
MPI_Reduce (& local_nstream_time , & nstream_time , 1 , MPI_DOUBLE , MPI_MAX , root ,
244
251
MPI_COMM_WORLD );
245
-
252
+
246
253
if (my_ID == root ) {
247
254
if (checkTRIADresults (iterations , length , a )) {
248
255
avgtime = nstream_time /iterations ;
@@ -254,34 +261,34 @@ int main(int argc, char **argv)
254
261
bail_out (error );
255
262
MPI_Finalize ();
256
263
}
257
-
258
-
264
+
265
+
259
266
int checkTRIADresults (int iterations , long int length , double * a ) {
260
267
double aj , bj , cj , scalar , asum ;
261
268
double epsilon = 1.e-8 ;
262
269
long int j ;
263
270
int iter ;
264
-
271
+
265
272
/* reproduce initialization */
266
273
aj = 0.0 ;
267
274
bj = 2.0 ;
268
275
cj = 2.0 ;
269
-
276
+
270
277
/* now execute timing loop */
271
278
scalar = SCALAR ;
272
279
for (iter = 0 ; iter <=iterations ; iter ++ ) aj += bj + scalar * cj ;
273
-
280
+
274
281
aj = aj * (double ) (length );
275
-
282
+
276
283
asum = 0.0 ;
277
284
for (j = 0 ; j < length ; j ++ ) asum += a [j ];
278
-
285
+
279
286
#if VERBOSE
280
287
printf ("Results Comparison: \n" );
281
288
printf (" Expected checksum: %f\n" ,aj );
282
289
printf (" Observed checksum: %f\n" ,asum );
283
290
#endif
284
-
291
+
285
292
if (ABS (aj - asum )/asum > epsilon ) {
286
293
printf ("Failed Validation on output array\n" );
287
294
#if !VERBOSE
@@ -295,4 +302,4 @@ int checkTRIADresults (int iterations, long int length, double *a) {
295
302
return (1 );
296
303
}
297
304
}
298
-
305
+
0 commit comments