Skip to content

Commit 67af453

Browse files
363 missing grb wait for collecting execution times (#364)
This MR adds calls to `grb::wait` for performance and other tests where its omission may result in false performance results for backends that perform nonblocking execution. This MR also fixes the following meta-bugs: - the return code of the label propagation algorithm was ignored in the performance benchmark driver, - same for the label propagation smoke test, - some initialisation operations were not error-checked in the BiCGstab smoke test, - same for the CG smoke tests, - re-enabled `sleep(1)` in the HPCG benchmark to detect system variabilities. All above changes only affect the ALP test suite. The MR does include one change that cannot be classified as a meta-bug: - the Pregel API constructor did not account for nonblocking behaviour, and should wait before returning to adhere to RAII. This changes the behaviour of all user applications based on ALP/Pregel in combination with the nonblocking backend; and only its performance characteristics (in particular, some of the initialisation of ALP/Pregel on some particular graph was previously be postponed until the execution of the first Pregel program-- after this change, all initialisation will have guaranteed to complete by the end of the constructor call). ALP/Pregel programs based on other backends remain unchanged in behaviour. This MR also improves, on occasion, error reporting within the test suite, and, as always, includes code style fixes. Thank you to @aristeidis-mastoras for flagging these issues and providing the initial fixes!
1 parent d82b61b commit 67af453

25 files changed

+768
-566
lines changed

include/graphblas/interfaces/pregel.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,9 @@ namespace grb {
413413
) {
414414
throw std::runtime_error( "Could not compute vertex IDs" );
415415
}
416+
if( grb::wait( IDs, indegrees, outdegrees, ones ) != SUCCESS ) {
417+
throw std::runtime_error( "Error during initialisation" );
418+
}
416419
}
417420

418421

tests/performance/dot.cpp

Lines changed: 62 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,28 @@ void functional_test( const struct test_input &in, struct test_output &out ) {
9090
free( xr );
9191
return;
9292
}
93-
if( grb::set< grb::descriptors::use_index >( xv, yv ) != grb::SUCCESS ) {
93+
if( grb::wait() != grb::SUCCESS ) {
9494
out.error_code = 101;
9595
free( yr );
9696
free( xr );
9797
return;
9898
}
99+
if( grb::set< grb::descriptors::use_index >( xv, yv ) != grb::SUCCESS ) {
100+
out.error_code = 105;
101+
free( yr );
102+
free( xr );
103+
return;
104+
}
105+
if( grb::wait() != grb::SUCCESS ) {
106+
out.error_code = 106;
107+
free( yr );
108+
free( xr );
109+
return;
110+
}
99111
for( size_t i = 0; i < n; ++i ) {
100-
xr[ i ] = (double)i;
112+
xr[ i ] = static_cast< double >( i );
101113
yr[ i ] = 0.5;
102-
check += 0.5 * (double)i;
114+
check += 0.5 * static_cast< double >( i );
103115
}
104116
out.check = check;
105117

@@ -111,7 +123,8 @@ void functional_test( const struct test_input &in, struct test_output &out ) {
111123
> reals;
112124
timer.reset();
113125
double alpha = 0.0;
114-
const RC rc = grb::dot( alpha, xv, yv, reals );
126+
RC rc = grb::dot( alpha, xv, yv, reals );
127+
rc = rc ? rc : grb::wait();
115128
out.time = timer.time();
116129
if( rc != SUCCESS ) {
117130
std::cerr << "Call to grb::dot failed with error " << grb::toString( rc )
@@ -163,10 +176,16 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
163176

164177
// set input
165178
if( grb::set< grb::descriptors::no_operation >( yv, 0.5 ) != grb::SUCCESS ) {
166-
out.error_code = 102;
179+
out.error_code = 110;
180+
}
181+
if( grb::wait() != grb::SUCCESS ) {
182+
out.error_code = 111;
167183
}
168184
if( grb::set< grb::descriptors::use_index >( xv, 0 ) != grb::SUCCESS ) {
169-
out.error_code = 103;
185+
out.error_code = 115;
186+
}
187+
if( grb::wait() != grb::SUCCESS ) {
188+
out.error_code = 116;
170189
}
171190
if( out.error_code ) {
172191
out.times.preamble = timer.time();
@@ -179,7 +198,8 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
179198
grb::identities::zero, grb::identities::one
180199
> reals;
181200
double alpha = 0.0;
182-
const enum RC rc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
201+
RC rc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
202+
rc = rc ? rc : grb::wait();
183203
if( rc != SUCCESS ) {
184204
std::cerr << "Call to grb::dot failed with error " << grb::toString( rc )
185205
<< std::endl;
@@ -197,8 +217,11 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
197217
for( size_t i = 0; i < in.rep; ++i ) {
198218
timer.reset();
199219
alpha = 0.0;
200-
const enum RC grc = grb::dot< grb::descriptors::dense >( alpha, xv, yv,
201-
reals );
220+
RC grc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
221+
// only wait if we must (avoid perfhit if wait is a guaranteed no-op)
222+
if( grb::Properties<>::isNonblockingExecution ) {
223+
grc = grc ? grc : grb::wait();
224+
}
202225
ttime += timer.time() / static_cast< double >( in.rep );
203226

204227
// sanity checks
@@ -237,11 +260,19 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
237260

238261
// set input
239262
if( grb::set< grb::descriptors::no_operation >( yv, 0.5 ) != grb::SUCCESS ) {
240-
out.error_code = 104;
263+
out.error_code = 120;
264+
return;
265+
}
266+
if( grb::wait() != grb::SUCCESS ) {
267+
out.error_code = 121;
241268
return;
242269
}
243270
if( grb::set< grb::descriptors::use_index >( xv, 0 ) != grb::SUCCESS ) {
244-
out.error_code = 105;
271+
out.error_code = 125;
272+
return;
273+
}
274+
if( grb::wait() != grb::SUCCESS ) {
275+
out.error_code = 126;
245276
return;
246277
}
247278
if( out.error_code ) {
@@ -255,7 +286,7 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
255286
grb::identities::zero, grb::identities::one
256287
> reals;
257288
double alpha = reals.template getZero< double >();
258-
const RC rc = grb::eWiseLambda< grb::descriptors::dense >(
289+
RC rc = grb::eWiseLambda< grb::descriptors::dense >(
259290
[ &xv, &yv, &alpha, &reals ]( const size_t i ) {
260291
double temp = 0.0;
261292
const auto mul_op = reals.getMultiplicativeOperator();
@@ -265,7 +296,8 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
265296
},
266297
xv
267298
);
268-
if( rc != SUCCESS ) {
299+
rc = rc ? rc : grb::wait();
300+
if( rc != grb::SUCCESS ) {
269301
std::cerr << "Error during call to grb::eWiseLambda, error: "
270302
<< grb::toString( rc ) << std::endl;
271303
out.times.preamble = timer.time();
@@ -282,7 +314,7 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
282314
for( size_t k = 0; k < in.rep; ++k ) {
283315
timer.reset();
284316
alpha = reals.template getZero< double >();
285-
const enum RC grc = grb::eWiseLambda(
317+
RC grc = grb::eWiseLambda(
286318
[ &xv, &yv, &alpha, &reals ]( const size_t i ) {
287319
double temp = xv[ i ];
288320
const auto mul_op = reals.getMultiplicativeOperator();
@@ -298,6 +330,10 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
298330
},
299331
xv
300332
);
333+
// only wait if we have to (avoid minor overhead if not required to wait)
334+
if( grb::Properties<>::isNonblockingExecution ) {
335+
grc = grc ? grc : grb::wait();
336+
}
301337
ltime += timer.time() / static_cast< double >( in.rep );
302338

303339
bool sane = true;
@@ -359,6 +395,7 @@ void bench_raw( const struct bench_input &in, struct bench_output &out ) {
359395
bench_kernels_dot( &alpha, xr, yr, n );
360396

361397
// done with preamble, start useful work
398+
grb::wait();
362399
out.times.preamble = timer.time();
363400
timer.reset();
364401

@@ -410,7 +447,7 @@ int main( int argc, char ** argv ) {
410447
in.n = strtoumax( argv[ 1 ], &end, 10 );
411448
if( argv[ 1 ] == end ) {
412449
std::cerr << "Could not parse argument " << argv[ 1 ] << " for vector "
413-
<< "length.\n Test FAILED." << std::endl;
450+
<< "length.\n Test FAILED\n" << std::endl;
414451
return 10;
415452
}
416453
test_in.n = in.n;
@@ -421,7 +458,7 @@ int main( int argc, char ** argv ) {
421458
in.rep = strtoumax( argv[ 2 ], &end, 10 );
422459
if( argv[ 2 ] == end ) {
423460
std::cerr << "Could not parse argument " << argv[ 2 ] << " for number of "
424-
<< "inner experiment repititions.\n Test FAILED." << std::endl;
461+
<< "inner experiment repititions.\n Test FAILED\n" << std::endl;
425462
return 20;
426463
}
427464
}
@@ -432,7 +469,7 @@ int main( int argc, char ** argv ) {
432469
outer = strtoumax( argv[ 3 ], &end, 10 );
433470
if( argv[ 3 ] == end ) {
434471
std::cerr << "Could not parse argument " << argv[ 3 ] << " for number of "
435-
<< "outer experiment repititions.\n Test FAILED." << std::endl;
472+
<< "outer experiment repititions.\n Test FAILED\n" << std::endl;
436473
return 30;
437474
}
438475
}
@@ -443,12 +480,12 @@ int main( int argc, char ** argv ) {
443480

444481
// start functional test
445482
if( launch.exec( &functional_test, test_in, test_out, true ) != SUCCESS ) {
446-
std::cerr << "Error launching functional test.\n Test FAILED." << std::endl;
483+
std::cerr << "Error launching functional test.\n Test FAILED\n" << std::endl;
447484
return 30;
448485
}
449486
if( test_out.error_code != 0 ) {
450487
std::cerr << "Functional test exits with nonzero exit code " << out.error_code
451-
<< "\nTest FAILED." << std::endl;
488+
<< "\nTest FAILED\n" << std::endl;
452489
return out.error_code;
453490
}
454491

@@ -472,12 +509,13 @@ int main( int argc, char ** argv ) {
472509
std::cout << "compiler-optimised dot product on raw arrays of size " << in.n
473510
<< std::endl;
474511
if( bench.exec( &bench_raw, in, out, 1, outer, true ) != SUCCESS ) {
475-
std::cerr << "Error launching raw benchmark test.\nTest FAILED." << std::endl;
512+
std::cerr << "Error launching raw benchmark test.\nTest FAILED\n"
513+
<< std::endl;
476514
return 60;
477515
}
478516
if( out.error_code != 0 ) {
479517
std::cerr << "Raw benchmark test exits with nonzero exit code "
480-
<< out.error_code << "\nTest FAILED." << std::endl;
518+
<< out.error_code << "\nTest FAILED\n" << std::endl;
481519
return out.error_code;
482520
}
483521

@@ -486,13 +524,13 @@ int main( int argc, char ** argv ) {
486524
<< grb::toString( grb::config::default_backend ) << ") of size " << in.n
487525
<< std::endl;
488526
if( bench.exec( &bench_templated, in, out, 1, outer, true ) != SUCCESS ) {
489-
std::cerr << "Error launching templated benchmark test.\n Test FAILED."
527+
std::cerr << "Error launching templated benchmark test.\n Test FAILED\n"
490528
<< std::endl;
491529
return 40;
492530
}
493531
if( out.error_code != 0 ) {
494532
std::cerr << "Templated benchmark test exits with nonzero exit code "
495-
<< out.error_code << "\nTest FAILED." << std::endl;
533+
<< out.error_code << "\nTest FAILED\n" << std::endl;
496534
return out.error_code;
497535
}
498536

@@ -502,7 +540,7 @@ int main( int argc, char ** argv ) {
502540
<< grb::toString( grb::config::default_backend ) << ") of size " << in.n
503541
<< std::endl;
504542
if( bench.exec( &bench_lambda, in, out, 1, outer, true ) != SUCCESS ) {
505-
std::cerr << "Error launching lambda benchmark test.\nTest FAILED."
543+
std::cerr << "Error launching lambda benchmark test.\nTest FAILED\n"
506544
<< std::endl;
507545
return 50;
508546
}

tests/performance/fma.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,13 @@ void test( const struct Input &in, struct Output &out ) {
146146
// WARNING: ALP incurs performance loss unless compiled using the nonblocking
147147
// backend
148148
if( mode == TEMPLATED ) {
149+
// flush any pending ops
150+
out.error = grb::wait();
151+
// start timing using a cold run to get the cache `hot' and get an early
152+
// run-time estimate
149153
double ttime = timer.time();
150-
// get cache `hot'
151-
out.error = grb::set< grb::descriptors::dense >( zv, yv );
154+
out.error = out.error ? out.error :
155+
grb::set< grb::descriptors::dense >( zv, yv );
152156
out.error = out.error ? out.error :
153157
grb::eWiseMul< grb::descriptors::dense >( zv, alpha, xv, reals );
154158
out.error = out.error ? out.error : grb::wait();
@@ -198,9 +202,12 @@ void test( const struct Input &in, struct Output &out ) {
198202
}
199203

200204
if( mode == LAMBDA ) {
205+
// flush any pending ops
206+
out.error = grb::wait();
207+
// start timing using a cold run to get the cache `hot' and get an early
208+
// run-time estimate
201209
double ltime = timer.time();
202-
// get cache `hot'
203-
out.error = grb::eWiseLambda(
210+
out.error = out.error ? out.error : grb::eWiseLambda(
204211
[ &zv, &alpha, &xv, &yv, &reals ]( const size_t i ) {
205212
// zv[ i ] = alpha * xv[ i ] + yv[ i ]
206213
(void) grb::apply( zv[ i ], alpha, xv[ i ],

tests/performance/label.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ void grbProgram( const struct input &data_in, struct output &out ) {
255255

256256
// create the symmetric weight matrix W, representing the weighted graph
257257
rc = rc ? rc : resize( W, nz );
258+
rc = rc ? rc : grb::wait();
258259
if( rc != SUCCESS ) {
259260
std::cerr << "\tinitialisation FAILED\n";
260261
out.error_code = rc;
@@ -269,7 +270,11 @@ void grbProgram( const struct input &data_in, struct output &out ) {
269270

270271
// run and time experiment
271272
timer.reset();
272-
algorithms::label( f, y, W, n, l );
273+
rc = algorithms::label( f, y, W, n, l );
274+
// wait only if required (avoid minor overhead if not required)
275+
if( grb::Properties<>::isNonblockingExecution ) {
276+
rc = rc ? rc : grb::wait();
277+
}
273278
out.times.useful = timer.time();
274279

275280
// output result

tests/performance/reduce.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,10 @@ void test( const struct Input &in, struct Output &out ) {
6060
{
6161
grb::Vector< int > dummy( in.n );
6262
out.error = grb::set( dummy, 0 );
63-
if( out.error == grb::SUCCESS ) {
64-
out.error = grb::set< grb::descriptors::use_index >( xv, dummy );
65-
}
63+
out.error = out.error ? out.error :
64+
grb::set< grb::descriptors::use_index >( xv, dummy );
65+
out.error = out.error ? out.error :
66+
grb::wait();
6667
}
6768
if( out.error != grb::SUCCESS ) {
6869
return;
@@ -77,6 +78,7 @@ void test( const struct Input &in, struct Output &out ) {
7778
double ttime = timer.time();
7879
// get cache `hot'
7980
out.error = grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
81+
out.error = out.error ? out.error : grb::wait();
8082
if( out.error != SUCCESS ) {
8183
std::cerr << "grb::foldl returns non-SUCCESS exit code "
8284
<< grb::toString( out.error ) << ".\n";
@@ -97,7 +99,16 @@ void test( const struct Input &in, struct Output &out ) {
9799
// benchmark templated axpy
98100
for( size_t i = 0; i < out.reps_used; ++i ) {
99101
alpha = 0.0;
100-
(void) grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
102+
out.error = grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
103+
// avoid overhead of calling wait if not required
104+
if( grb::Properties<>::isNonblockingExecution ) {
105+
out.error = out.error ? out.error : grb::wait();
106+
}
107+
if( out.error != grb::SUCCESS ) {
108+
std::cerr << "grb::foldl returns " << grb::toString( out.error )
109+
<< " during hot benchmark loop; exiting with error!\n";
110+
return;
111+
}
101112
}
102113
out.times.useful = timer.time() / static_cast< double >( out.reps_used );
103114

@@ -122,10 +133,11 @@ void test( const struct Input &in, struct Output &out ) {
122133
// get cache `hot'
123134
alpha = realm.template getIdentity< double >();
124135
out.error = grb::eWiseLambda(
125-
[ &alpha, &xv, &realm ]( const size_t i ) {
126-
(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
127-
},
136+
[ &alpha, &xv, &realm ]( const size_t i ) {
137+
(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
138+
},
128139
xv );
140+
out.error = out.error ? out.error : grb::wait();
129141
if( out.error != SUCCESS ) {
130142
std::cerr << "grb::eWiseLambda returns non-SUCCESS exit code "
131143
<< grb::toString( out.error ) << ".\n";
@@ -146,18 +158,27 @@ void test( const struct Input &in, struct Output &out ) {
146158
// benchmark templated axpy
147159
for( size_t i = 0; i < out.reps_used; ++i ) {
148160
alpha = realm.template getIdentity< double >();
149-
(void)grb::eWiseLambda(
150-
[ &alpha, &xv, &realm ]( const size_t i ) {
151-
(void)grb::foldl( alpha, xv[ i ], realm.getOperator() );
152-
},
161+
out.error = grb::eWiseLambda(
162+
[ &alpha, &xv, &realm ]( const size_t i ) {
163+
(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
164+
},
153165
xv );
166+
// avoid overhead of calling wait if not required
167+
if( grb::Properties<>::isNonblockingExecution ) {
168+
out.error = out.error ? out.error : grb::wait();
169+
}
170+
if( out.error != grb::SUCCESS ) {
171+
std::cerr << "grb::foldl returns " << grb::toString( out.error )
172+
<< " during hot benchmark loop; exiting with error!\n";
173+
return;
174+
}
154175
}
155176
out.times.useful = timer.time() / static_cast< double >( out.reps_used );
156177

157178
// postamble
158179
timer.reset();
159180
for( size_t i = 0; i < in.n; ++i ) {
160-
if( ! grb::utils::equals( expected, alpha, in.n - 1 ) ) {
181+
if( !grb::utils::equals( expected, alpha, in.n - 1 ) ) {
161182
std::cout << expected << " (expected) does not equal " << alpha
162183
<< " (eWiseLambda).\n";
163184
out.error = FAILED;

0 commit comments

Comments
 (0)