Algebraic-Programming
diff --git a/‎include/graphblas/interfaces/pregel.hpp‎
Lines changed: 3 additions & 0 deletions b/‎include/graphblas/interfaces/pregel.hpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/performance/dot.cpp‎
Lines changed: 62 additions & 24 deletions b/‎tests/performance/dot.cpp‎
Lines changed: 62 additions & 24 deletions
diff --git a/‎tests/performance/fma.cpp‎
Lines changed: 11 additions & 4 deletions b/‎tests/performance/fma.cpp‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎tests/performance/label.cpp‎
Lines changed: 6 additions & 1 deletion b/‎tests/performance/label.cpp‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎tests/performance/reduce.cpp‎
Lines changed: 33 additions & 12 deletions b/‎tests/performance/reduce.cpp‎
Lines changed: 33 additions & 12 deletions
@@ -413,6 +413,9 @@ namespace grb {
 					) {
 						throw std::runtime_error( "Could not compute vertex IDs" );
 					}
+					if( grb::wait( IDs, indegrees, outdegrees, ones ) != SUCCESS ) {
+						throw std::runtime_error( "Error during initialisation" );
+					}
 				}
 
 
 
@@ -90,16 +90,28 @@ void functional_test( const struct test_input &in, struct test_output &out ) {
 		free( xr );
 		return;
 	}
-	if( grb::set< grb::descriptors::use_index >( xv, yv ) != grb::SUCCESS ) {
+	if( grb::wait() != grb::SUCCESS ) {
 		out.error_code = 101;
 		free( yr );
 		free( xr );
 		return;
 	}
+	if( grb::set< grb::descriptors::use_index >( xv, yv ) != grb::SUCCESS ) {
+		out.error_code = 105;
+		free( yr );
+		free( xr );
+		return;
+	}
+	if( grb::wait() != grb::SUCCESS ) {
+		out.error_code = 106;
+		free( yr );
+		free( xr );
+		return;
+	}
 	for( size_t i = 0; i < n; ++i ) {
-		xr[ i ] = (double)i;
+		xr[ i ] = static_cast< double >( i );
 		yr[ i ] = 0.5;
-		check += 0.5 * (double)i;
+		check += 0.5 * static_cast< double >( i );
 	}
 	out.check = check;
 
@@ -111,7 +123,8 @@ void functional_test( const struct test_input &in, struct test_output &out ) {
 	> reals;
 	timer.reset();
 	double alpha = 0.0;
-	const RC rc = grb::dot( alpha, xv, yv, reals );
+	RC rc = grb::dot( alpha, xv, yv, reals );
+	rc = rc ? rc : grb::wait();
 	out.time = timer.time();
 	if( rc != SUCCESS ) {
 		std::cerr << "Call to grb::dot failed with error " << grb::toString( rc )
@@ -163,10 +176,16 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
 
 	// set input
 	if( grb::set< grb::descriptors::no_operation >( yv, 0.5 ) != grb::SUCCESS ) {
-		out.error_code = 102;
+		out.error_code = 110;
+	}
+	if( grb::wait() != grb::SUCCESS ) {
+		out.error_code = 111;
 	}
 	if( grb::set< grb::descriptors::use_index >( xv, 0 ) != grb::SUCCESS ) {
-		out.error_code = 103;
+		out.error_code = 115;
+	}
+	if( grb::wait() != grb::SUCCESS ) {
+		out.error_code = 116;
 	}
 	if( out.error_code ) {
 		out.times.preamble = timer.time();
@@ -179,7 +198,8 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
 		grb::identities::zero, grb::identities::one
 	> reals;
 	double alpha = 0.0;
-	const enum RC rc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
+	RC rc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
+	rc = rc ? rc : grb::wait();
 	if( rc != SUCCESS ) {
 		std::cerr << "Call to grb::dot failed with error " << grb::toString( rc )
 			<< std::endl;
@@ -197,8 +217,11 @@ void bench_templated( const struct bench_input &in, struct bench_output &out ) {
 	for( size_t i = 0; i < in.rep; ++i ) {
 		timer.reset();
 		alpha = 0.0;
-		const enum RC grc = grb::dot< grb::descriptors::dense >( alpha, xv, yv,
-			reals );
+		RC grc = grb::dot< grb::descriptors::dense >( alpha, xv, yv, reals );
+		// only wait if we must (avoid perfhit if wait is a guaranteed no-op)
+		if( grb::Properties<>::isNonblockingExecution ) {
+			grc = grc ? grc : grb::wait();
+		}
 		ttime += timer.time() / static_cast< double >( in.rep );
 
 		// sanity checks
@@ -237,11 +260,19 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
 
 	// set input
 	if( grb::set< grb::descriptors::no_operation >( yv, 0.5 ) != grb::SUCCESS ) {
-		out.error_code = 104;
+		out.error_code = 120;
+		return;
+	}
+	if( grb::wait() != grb::SUCCESS ) {
+		out.error_code = 121;
 		return;
 	}
 	if( grb::set< grb::descriptors::use_index >( xv, 0 ) != grb::SUCCESS ) {
-		out.error_code = 105;
+		out.error_code = 125;
+		return;
+	}
+	if( grb::wait() != grb::SUCCESS ) {
+		out.error_code = 126;
 		return;
 	}
 	if( out.error_code ) {
@@ -255,7 +286,7 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
 		grb::identities::zero, grb::identities::one
 	> reals;
 	double alpha = reals.template getZero< double >();
-	const RC rc = grb::eWiseLambda< grb::descriptors::dense >(
+	RC rc = grb::eWiseLambda< grb::descriptors::dense >(
 		[ &xv, &yv, &alpha, &reals ]( const size_t i ) {
 			double temp = 0.0;
 			const auto mul_op = reals.getMultiplicativeOperator();
@@ -265,7 +296,8 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
 		},
 		xv
 	);
-	if( rc != SUCCESS ) {
+	rc = rc ? rc : grb::wait();
+	if( rc != grb::SUCCESS ) {
 		std::cerr << "Error during call to grb::eWiseLambda, error: "
 			<< grb::toString( rc ) << std::endl;
 		out.times.preamble = timer.time();
@@ -282,7 +314,7 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
 	for( size_t k = 0; k < in.rep; ++k ) {
 		timer.reset();
 		alpha = reals.template getZero< double >();
-		const enum RC grc = grb::eWiseLambda(
+		RC grc = grb::eWiseLambda(
 			[ &xv, &yv, &alpha, &reals ]( const size_t i ) {
 				double temp = xv[ i ];
 				const auto mul_op = reals.getMultiplicativeOperator();
@@ -298,6 +330,10 @@ void bench_lambda( const struct bench_input &in, struct bench_output &out ) {
 			},
 			xv
 		);
+		// only wait if we have to (avoid minor overhead if not required to wait)
+		if( grb::Properties<>::isNonblockingExecution ) {
+			grc = grc ? grc : grb::wait();
+		}
 		ltime += timer.time() / static_cast< double >( in.rep );
 
 		bool sane = true;
@@ -359,6 +395,7 @@ void bench_raw( const struct bench_input &in, struct bench_output &out ) {
 	bench_kernels_dot( &alpha, xr, yr, n );
 
 	// done with preamble, start useful work
+	grb::wait();
 	out.times.preamble = timer.time();
 	timer.reset();
 
@@ -410,7 +447,7 @@ int main( int argc, char ** argv ) {
 	in.n = strtoumax( argv[ 1 ], &end, 10 );
 	if( argv[ 1 ] == end ) {
 		std::cerr << "Could not parse argument " << argv[ 1 ] << " for vector "
-			<< "length.\n Test FAILED." << std::endl;
+			<< "length.\n Test FAILED\n" << std::endl;
 		return 10;
 	}
 	test_in.n = in.n;
@@ -421,7 +458,7 @@ int main( int argc, char ** argv ) {
 		in.rep = strtoumax( argv[ 2 ], &end, 10 );
 		if( argv[ 2 ] == end ) {
 			std::cerr << "Could not parse argument " << argv[ 2 ] << " for number of "
-				<< "inner experiment repititions.\n Test FAILED." << std::endl;
+				<< "inner experiment repititions.\n Test FAILED\n" << std::endl;
 			return 20;
 		}
 	}
@@ -432,7 +469,7 @@ int main( int argc, char ** argv ) {
 		outer = strtoumax( argv[ 3 ], &end, 10 );
 		if( argv[ 3 ] == end ) {
 			std::cerr << "Could not parse argument " << argv[ 3 ] << " for number of "
-				<< "outer experiment repititions.\n Test FAILED." << std::endl;
+				<< "outer experiment repititions.\n Test FAILED\n" << std::endl;
 			return 30;
 		}
 	}
@@ -443,12 +480,12 @@ int main( int argc, char ** argv ) {
 
 	// start functional test
 	if( launch.exec( &functional_test, test_in, test_out, true ) != SUCCESS ) {
-		std::cerr << "Error launching functional test.\n Test FAILED." << std::endl;
+		std::cerr << "Error launching functional test.\n Test FAILED\n" << std::endl;
 		return 30;
 	}
 	if( test_out.error_code != 0 ) {
 		std::cerr << "Functional test exits with nonzero exit code " << out.error_code
-			<< "\nTest FAILED." << std::endl;
+			<< "\nTest FAILED\n" << std::endl;
 		return out.error_code;
 	}
 
@@ -472,12 +509,13 @@ int main( int argc, char ** argv ) {
 	std::cout << "compiler-optimised dot product on raw arrays of size " << in.n
 		<< std::endl;
 	if( bench.exec( &bench_raw, in, out, 1, outer, true ) != SUCCESS ) {
-		std::cerr << "Error launching raw benchmark test.\nTest FAILED." << std::endl;
+		std::cerr << "Error launching raw benchmark test.\nTest FAILED\n"
+			<< std::endl;
 		return 60;
 	}
 	if( out.error_code != 0 ) {
 		std::cerr << "Raw benchmark test exits with nonzero exit code "
-			<< out.error_code << "\nTest FAILED." << std::endl;
+			<< out.error_code << "\nTest FAILED\n" << std::endl;
 		return out.error_code;
 	}
 
@@ -486,13 +524,13 @@ int main( int argc, char ** argv ) {
 		<< grb::toString( grb::config::default_backend ) << ") of size " << in.n
 		<< std::endl;
 	if( bench.exec( &bench_templated, in, out, 1, outer, true ) != SUCCESS ) {
-		std::cerr << "Error launching templated benchmark test.\n Test FAILED."
+		std::cerr << "Error launching templated benchmark test.\n Test FAILED\n"
 			<< std::endl;
 		return 40;
 	}
 	if( out.error_code != 0 ) {
 		std::cerr << "Templated benchmark test exits with nonzero exit code "
-			<< out.error_code << "\nTest FAILED." << std::endl;
+			<< out.error_code << "\nTest FAILED\n" << std::endl;
 		return out.error_code;
 	}
 
@@ -502,7 +540,7 @@ int main( int argc, char ** argv ) {
 			<< grb::toString( grb::config::default_backend ) << ") of size " << in.n
 			<< std::endl;
 		if( bench.exec( &bench_lambda, in, out, 1, outer, true ) != SUCCESS ) {
-			std::cerr << "Error launching lambda benchmark test.\nTest FAILED."
+			std::cerr << "Error launching lambda benchmark test.\nTest FAILED\n"
 				<< std::endl;
 			return 50;
 		}
 
@@ -146,9 +146,13 @@ void test( const struct Input &in, struct Output &out ) {
 	// WARNING: ALP incurs performance loss unless compiled using the nonblocking
 	//          backend
 	if( mode == TEMPLATED ) {
+		// flush any pending ops
+		out.error = grb::wait();
+		// start timing using a cold run to get the cache `hot' and get an early
+		// run-time estimate
 		double ttime = timer.time();
-		// get cache `hot'
-		out.error = grb::set< grb::descriptors::dense >( zv, yv );
+		out.error = out.error ? out.error :
+			grb::set< grb::descriptors::dense >( zv, yv );
 		out.error = out.error ? out.error :
 			grb::eWiseMul< grb::descriptors::dense >( zv, alpha, xv, reals );
 		out.error = out.error ? out.error : grb::wait();
@@ -198,9 +202,12 @@ void test( const struct Input &in, struct Output &out ) {
 	}
 
 	if( mode == LAMBDA ) {
+		// flush any pending ops
+		out.error = grb::wait();
+		// start timing using a cold run to get the cache `hot' and get an early
+		// run-time estimate
 		double ltime = timer.time();
-		// get cache `hot'
-		out.error = grb::eWiseLambda(
+		out.error = out.error ? out.error : grb::eWiseLambda(
 			[ &zv, &alpha, &xv, &yv, &reals ]( const size_t i ) {
 				// zv[ i ] = alpha * xv[ i ] + yv[ i ]
 				(void) grb::apply( zv[ i ], alpha, xv[ i ],
 
@@ -255,6 +255,7 @@ void grbProgram( const struct input &data_in, struct output &out ) {
 
 	// create the symmetric weight matrix W, representing the weighted graph
 	rc = rc ? rc : resize( W, nz );
+	rc = rc ? rc : grb::wait();
 	if( rc != SUCCESS ) {
 		std::cerr << "\tinitialisation FAILED\n";
 		out.error_code = rc;
@@ -269,7 +270,11 @@ void grbProgram( const struct input &data_in, struct output &out ) {
 
 	// run and time experiment
 	timer.reset();
-	algorithms::label( f, y, W, n, l );
+	rc = algorithms::label( f, y, W, n, l );
+	// wait only if required (avoid minor overhead if not required)
+	if( grb::Properties<>::isNonblockingExecution ) {
+		rc = rc ? rc : grb::wait();
+	}
 	out.times.useful = timer.time();
 
 	// output result
 
@@ -60,9 +60,10 @@ void test( const struct Input &in, struct Output &out ) {
 	{
 		grb::Vector< int > dummy( in.n );
 		out.error = grb::set( dummy, 0 );
-		if( out.error == grb::SUCCESS ) {
-			out.error = grb::set< grb::descriptors::use_index >( xv, dummy );
-		}
+		out.error = out.error ? out.error :
+			grb::set< grb::descriptors::use_index >( xv, dummy );
+		out.error = out.error ? out.error :
+			grb::wait();
 	}
 	if( out.error != grb::SUCCESS ) {
 		return;
@@ -77,6 +78,7 @@ void test( const struct Input &in, struct Output &out ) {
 		double ttime = timer.time();
 		// get cache `hot'
 		out.error = grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
+		out.error = out.error ? out.error : grb::wait();
 		if( out.error != SUCCESS ) {
 			std::cerr << "grb::foldl returns non-SUCCESS exit code "
 				<< grb::toString( out.error ) << ".\n";
@@ -97,7 +99,16 @@ void test( const struct Input &in, struct Output &out ) {
 		// benchmark templated axpy
 		for( size_t i = 0; i < out.reps_used; ++i ) {
 			alpha = 0.0;
-			(void) grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
+			out.error = grb::foldl< grb::descriptors::dense >( alpha, xv, realm );
+			// avoid overhead of calling wait if not required
+			if( grb::Properties<>::isNonblockingExecution ) {
+				out.error = out.error ? out.error : grb::wait();
+			}
+			if( out.error != grb::SUCCESS ) {
+				std::cerr << "grb::foldl returns " << grb::toString( out.error )
+					<< " during hot benchmark loop; exiting with error!\n";
+				return;
+			}
 		}
 		out.times.useful = timer.time() / static_cast< double >( out.reps_used );
 
@@ -122,10 +133,11 @@ void test( const struct Input &in, struct Output &out ) {
 		// get cache `hot'
 		alpha = realm.template getIdentity< double >();
 		out.error = grb::eWiseLambda(
-			[ &alpha, &xv, &realm ]( const size_t i ) {
-				(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
-			},
+				[ &alpha, &xv, &realm ]( const size_t i ) {
+					(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
+				},
 			xv );
+		out.error = out.error ? out.error : grb::wait();
 		if( out.error != SUCCESS ) {
 			std::cerr << "grb::eWiseLambda returns non-SUCCESS exit code "
 				<< grb::toString( out.error ) << ".\n";
@@ -146,18 +158,27 @@ void test( const struct Input &in, struct Output &out ) {
 		// benchmark templated axpy
 		for( size_t i = 0; i < out.reps_used; ++i ) {
 			alpha = realm.template getIdentity< double >();
-			(void)grb::eWiseLambda(
-				[ &alpha, &xv, &realm ]( const size_t i ) {
-					(void)grb::foldl( alpha, xv[ i ], realm.getOperator() );
-				},
+			out.error = grb::eWiseLambda(
+					[ &alpha, &xv, &realm ]( const size_t i ) {
+						(void) grb::foldl( alpha, xv[ i ], realm.getOperator() );
+					},
 				xv );
+			// avoid overhead of calling wait if not required
+			if( grb::Properties<>::isNonblockingExecution ) {
+				out.error = out.error ? out.error : grb::wait();
+			}
+			if( out.error != grb::SUCCESS ) {
+				std::cerr << "grb::foldl returns " << grb::toString( out.error )
+					<< " during hot benchmark loop; exiting with error!\n";
+				return;
+			}
 		}
 		out.times.useful = timer.time() / static_cast< double >( out.reps_used );
 
 		// postamble
 		timer.reset();
 		for( size_t i = 0; i < in.n; ++i ) {
-			if( ! grb::utils::equals( expected, alpha, in.n - 1 ) ) {
+			if( !grb::utils::equals( expected, alpha, in.n - 1 ) ) {
 				std::cout << expected << " (expected) does not equal " << alpha
 					<< " (eWiseLambda).\n";
 				out.error = FAILED;
Original file line number	Diff line number	Diff line change
`@@ -413,6 +413,9 @@ namespace grb {`
`413`	`413`	`) {`
`414`	`414`	`throw std::runtime_error( "Could not compute vertex IDs" );`
`415`	`415`	`}`
	`416`	`+ if( grb::wait( IDs, indegrees, outdegrees, ones ) != SUCCESS ) {`
	`417`	`+ throw std::runtime_error( "Error during initialisation" );`
	`418`	`+ }`
`416`	`419`	`}`
`417`	`420`
`418`	`421`