matrix generators

stillwater-sc · Jul 20, 2020 · 1240508 · 1240508
1 parent 784aba0
commit 1240508
Show file tree

Hide file tree

Showing 7 changed files with 201 additions and 91 deletions.
diff --git a/PURPOSE.md b/PURPOSE.md
@@ -0,0 +1,11 @@
+# Numerical examples
+
+The Universal library contains parameterized number systems to aid in the research to create custom
+algorithms that are tailored to the arithmetic requirements of the applications. In high-performance
+and real-time applications, this tailored number system approach has been a common approach to 
+maximize performance and/or performance per Watt. More recently, Deep Learning has rekindled the
+interest in tailored number systems as training DNNs is a lengthy affair and improving computational
+performance by minimizing hardware, memory or network bandwidth, had immediate commercial benefits.
+As Deep Learning AI is getting embedded in stand-alone devices, the benefit of multi-precision
+number system optimizations is broadening to any application that uses deep computes to deliver
+knowledge generation or decision making.
diff --git a/examples/blas/matrix_ops.cpp b/examples/blas/matrix_ops.cpp
@@ -17,30 +17,38 @@
 #include <universal/posit/posit>
 #define BLAS_TRACE_ROUNDING_EVENTS 1
 #include <universal/blas/blas.hpp>
+#include <universal/blas/generators.hpp>
 
-template<typename Matrix>
-void generateIdentity() {
-	Matrix A(4, 4);
+template<typename Scalar>
+void generateMatrices() {
+	using namespace std;
+	using Matrix = sw::unum::blas::matrix<Scalar>;
+
+	Matrix A(5, 5);
+	// create an Identity matrix
 	A = 1;
 	std::cout << A << std::endl;
+
+	// create a 2D Laplacian
+	laplace2D(A, 10, 10);
+	cout << A << endl;
+
+	// create a uniform random matrix
+	Matrix B(10, 10);
+	uniform_rand(B, 0.0, 1.0);
+	cout << setprecision(5) << setw(10) << B << endl;
 }
 
 int main(int argc, char** argv)
 try {
 	using namespace std;
 	using namespace sw::unum::blas;
 
-	using p8_0  = sw::unum::posit< 8,0>;
-	using p16_1 = sw::unum::posit<16,1>;
-	using p32_2 = sw::unum::posit<32,2>;
-
-	using MatrixP08 = sw::unum::blas::matrix<p8_0>;
-	using MatrixP16 = sw::unum::blas::matrix<p16_1>;
-	using MatrixP32 = sw::unum::blas::matrix<p32_2>;
+	using Scalar = sw::unum::posit<16,1>;
 
-	generateIdentity<MatrixP08>();
-	generateIdentity<MatrixP16>();
-	generateIdentity<MatrixP32>();
+	generateMatrices< sw::unum::posit< 8, 0> >();
+	generateMatrices< sw::unum::posit<16, 1> >();
+	generateMatrices< sw::unum::posit<32, 2> >();
 
 	return EXIT_SUCCESS;
 }

diff --git a/examples/pde/laplace.cpp b/examples/pde/laplace.cpp
@@ -8,20 +8,22 @@
 // enable posit arithmetic exceptions
 #define POSIT_THROW_ARITHMETIC_EXCEPTION 1
 #include <universal/posit/posit>
+#include <universal/blas/blas.hpp>
+#include <universal/blas/generators.hpp>
 
 /*
 
 Mathematical 	C++ Symbol	Decimal Representation
 Expression
-pi			M_PI		3.14159265358979323846
+pi				M_PI		3.14159265358979323846
 pi/2			M_PI_2		1.57079632679489661923
 pi/4			M_PI_4		0.785398163397448309616
 1/pi			M_1_PI		0.318309886183790671538
 2/pi			M_2_PI		0.636619772367581343076
 2/sqrt(pi)		M_2_SQRTPI	1.12837916709551257390
 sqrt(2)			M_SQRT2		1.41421356237309504880
 1/sqrt(2)		M_SQRT1_2	0.707106781186547524401
-e			M_E		2.71828182845904523536
+e				M_E			2.71828182845904523536
 log_2(e)		M_LOG2E		1.44269504088896340736
 log_10(e)		M_LOG10E	0.434294481903251827651
 log_e(2)		M_LN2		0.693147180559945309417
@@ -33,54 +35,75 @@ constexpr double pi = 3.14159265358979323846;  // best practice for C++
 
 /*
 
-When you say "an application study on the typical length of a unum during the solution of a 
-PDE or ODE or optimization problem", there's a veritable mountain of papers that could be 
-written on such a broad topic. But do you mean Type I unums? It sounds like it, since those 
-are the only ones that vary in length. There's a chapter of The End of Error that I was not 
-able to finish in time, but it still needs to be written, regarding Laplace's equation and 
-other PDEs that can be solved crudely at first and then refined since the errors do not 
-accumulate with time-stepping. Start at very low precision and work up, saving power and energy. 
-With what we know now about posits, a much better design for a number format than Type I, 
-it would be very cool indeed to show an example of a PDE using various posit precisions to 
-minimize the power consumed to get to an answer. Do you have the time resources for such an 
-effort? I feel like I'm pretty swamped right now, but getting the Draft Standard closer to 
-complete will be one thing I can take off my plate if everyone thinks it's looking pretty 
-close to perfect.
-
-If I were starting the experiment, I'd try solving Laplace's equation on a square. 
-Boundary condition f(x,y) = 1 on some part of the square like the left half of the bottom 
-border (to be asymmetrical), f(x,y) = 0 elsewhere on the border or maybe put a –1 value 
-somewhere (it wastes the sign bit of a posit if all the values are nonnegative), ∇²f = 0 
-on the interior. Try the oldest and simplest method of relaxation, even though better 
-iterative methods are known, with a very coarse grid and very low precision posits. 
-Like, a 4-by-4 grid of points and 4-bit posits with eS = 2. It should converge very quickly, 
-and I think the discretization error goes as the inverse square of the grid spacing. 
-
-The idea is to decrease rounding error by adding bits to the end of the posit solution 
-and refine the grid, keeping those two sources of error in the same ballpark. 
-
-My hypothesis is that the result will resemble a multigrid solver, which is actually one 
-of the best ways to solve Laplace's equation.
-
-If that works, then I'd try making the domain L-shaped, which I believe produces a 
-singularity at the interior corner point and should make it harder to converge there. 
-
-As I said, I had hoped to try this approach with Type I unums but ran out of time. 
-My manuscript was six months late as it was!
+When you say "an application study on the typical 
+length of a unum during the solution of a PDE or ODE 
+or optimization problem", there's a veritable mountain 
+of papers that could be written on such a broad topic. 
+But do you mean Type I unums? It sounds like it, since those 
+are the only ones that vary in length. There's a chapter 
+of The End of Error that I was not able to finish in time, 
+but it still needs to be written, regarding Laplace's 
+equation and other PDEs that can be solved crudely at 
+first and then refined since the errors do not accumulate 
+with time-stepping. Start at very low precision and work 
+up, saving power and energy. 
+
+With what we know now about posits, a much better design 
+for a number format than Type I, it would be very cool 
+indeed to show an example of a PDE using various posit 
+precisions to minimize the power consumed to get to an 
+answer. Do you have the time resources for such an effort? 
+I feel like I'm pretty swamped right now, but getting 
+the Draft Standard closer to complete will be one thing 
+I can take off my plate if everyone thinks it's looking 
+pretty close to perfect.
+
+If I were starting the experiment, I'd try solving 
+Laplace's equation on a square. Boundary condition 
+f(x,y) = 1 on some part of the square like the left half 
+           of the bottom border (to be asymmetrical), 
+f(x,y) = 0 elsewhere on the border or maybe put a –1 value 
+           somewhere (it wastes the sign bit of a posit 
+		   if all the values are nonnegative), 
+∇²f = 0 on the interior. Try the oldest and simplest 
+method of relaxation, even though better iterative methods 
+are known, with a very coarse grid and very low precision posits. 
+Like, a 4-by-4 grid of points and 4-bit posits with es = 2. 
+It should converge very quickly, and I think the discretization 
+error goes as the inverse square of the grid spacing. 
+
+The idea is to decrease rounding error by adding bits 
+to the end of the posit solution and refine the grid, 
+keeping those two sources of error in the same ballpark. 
+
+My hypothesis is that the result will resemble a multigrid solver, 
+which is actually one of the best ways to solve Laplace's equation.
+
+If that works, then I'd try making the domain L-shaped, 
+which I believe produces a singularity at the interior corner point 
+and should make it harder to converge there. 
+
+As I said, I had hoped to try this approach with Type I unums 
+but ran out of time. My manuscript was six months late as it was!
 */
 
 int main(int argc, char** argv)
 try {
 	using namespace std;
 	using namespace sw::unum;
+	using namespace sw::unum::blas;
 
 	const size_t nbits = 16;
 	const size_t es = 1;
+	using Scalar = posit<nbits, es>;
+//	using Scalar = float;
+	using Matrix = sw::unum::blas::matrix<Scalar>;
 
 	int nrOfFailedTestCases = 0;
 
-	posit<nbits, es> p;
-
+	Matrix A;
+	laplace2D(A, 10, 10);
+	cout << A << endl;
 
 	return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
 }

diff --git a/include/universal/blas/generators.hpp b/include/universal/blas/generators.hpp
@@ -0,0 +1,13 @@
+#pragma once
+// generators.hpp: matrix generators
+//
+// Copyright (C) 2017-2020 Stillwater Supercomputing, Inc.
+//
+// This file is part of the universal numbers project, which is released under an MIT Open Source license.
+
+#include "vector.hpp"
+#include "matrix.hpp"
+
+// matrix generators
+#include "uniform_random.hpp"
+#include "laplace2D.hpp"
diff --git a/include/universal/blas/laplace2D.hpp b/include/universal/blas/laplace2D.hpp
@@ -0,0 +1,31 @@
+#pragma once
+// laplace2D.hpp: generate 2D Laplace operator difference matrix on a square domain
+//
+// Copyright (C) 2017-2020 Stillwater Supercomputing, Inc.
+//
+// This file is part of the universal numbers project, which is released under an MIT Open Source license.
+#include <universal/blas/blas.hpp>
+//#include <universal/posit/posit_fwd.hpp>
+
+namespace sw { namespace unum { namespace blas { 
+
+// generate a 2D square domain Laplacian difference equation matrix
+template<typename Scalar>
+void laplace2D(matrix<Scalar>& A, size_t m, size_t n) {
+	A.resize(m,n);
+	A.setzero();
+	assert(A.rows() == m * n);
+	for (size_t i = 0; i < m; ++i) {
+		for (size_t j = 0; j < n; ++j) {
+			Scalar four(4.0), minus_one(-1.0);
+			size_t row = i * n + j;
+			A(row, row) = four;
+			if (j < n - 1) A(row, row + 1) = minus_one;
+			if (i < m - 1) A(row, row + n) = minus_one;
+			if (j > 0) A(row, row - 1) = minus_one;
+			if (i > 0) A(row, row - n) = minus_one;
+		}
+	}
+}
+
+}}} // namespace sw::unum::blas
diff --git a/include/universal/blas/matrix.hpp b/include/universal/blas/matrix.hpp
@@ -35,13 +35,14 @@ class RowProxy {
 template<typename Scalar>
 class matrix {
 public:
-	typedef Scalar                            value_type;
-	typedef const value_type&                 const_reference;
-	typedef value_type&                       reference;
-	typedef const value_type*                 const_pointer_type;
-
-	matrix() {}
-	matrix(size_t _m, size_t _n) : m{ _m }, n{ _n }, data(m*n, Scalar(0.0)) { }
+	typedef Scalar									value_type;
+	typedef const value_type&						const_reference;
+	typedef value_type&								reference;
+	typedef const value_type*						const_pointer_type;
+	typedef typename std::vector<Scalar>::size_type size_type;
+
+	matrix() : _m{ 0 }, _n{ 0 }, data(0) {}
+	matrix(size_t m, size_t n) : _m{ m }, _n{ n }, data(m*n, Scalar(0.0)) { }
 	matrix(std::initializer_list< std::initializer_list<Scalar> > values) {
 		size_t nrows = values.size();
 		size_t ncols = values.begin()->size();
@@ -57,10 +58,10 @@ class matrix {
 				++r;
 			}
 		}
-		m = nrows;
-		n = ncols;
+		_m = nrows;
+		_n = ncols;
 	}
-	matrix(const matrix& A) : m{ A.m }, n{ A.n }, data(A.data) {}
+	matrix(const matrix& A) : _m{ A._m }, _n{ A._n }, data(A.data) {}
 
 	// operators
 	matrix& operator=(const matrix& M) = default;
@@ -69,30 +70,30 @@ class matrix {
 	// Identity matrix operator
 	matrix& operator=(const Scalar& one) {
 		setzero();
-		size_t smallestDimension = (m < n ? m : n);
-		for (size_t i = 0; i < smallestDimension; ++i) data[i*n + i] = one;
+		size_t smallestDimension = (_m < _n ? _m : _n);
+		for (size_t i = 0; i < smallestDimension; ++i) data[i*_n + i] = one;
 		return *this;
 	}
 
-	Scalar operator()(size_t i, size_t j) const { return data[i*n + j]; }
-	Scalar& operator()(size_t i, size_t j) { return data[i*n + j]; }
+	Scalar operator()(size_t i, size_t j) const { return data[i*_n + j]; }
+	Scalar& operator()(size_t i, size_t j) { return data[i*_n + j]; }
 	RowProxy<Scalar> operator[](size_t i) {
-		typename std::vector<Scalar>::iterator it = data.begin() + i * n;
+		typename std::vector<Scalar>::iterator it = data.begin() + i * _n;
 		RowProxy<Scalar> proxy(it);
 		return proxy;
 	}
 	ConstRowProxy<Scalar> operator[](size_t i) const {
-		typename std::vector<Scalar>::const_iterator it = data.begin() + i * n;
+		typename std::vector<Scalar>::const_iterator it = data.begin() + i * _n;
 		ConstRowProxy<Scalar> proxy(it);
 		return proxy;
 	}
 
 	// modifiers
 	inline void setzero() { for (auto& elem : data) elem = Scalar(0); }
-
+	inline void resize(size_t m, size_t n) { _m = m; _n = n; data.resize(m * n * m * n); }
 	// selectors
-	inline size_t rows() const { return m; }
-	inline size_t cols() const { return n; }
+	inline size_t rows() const { return _m; }
+	inline size_t cols() const { return _n; }
 
 	// Eigen operators I need to reverse engineer
 	matrix Zero(size_t m, size_t n) {
@@ -108,7 +109,7 @@ class matrix {
 	}
 
 private:
-	size_t m, n; // m rows and n columns
+	size_t _m, _n; // m rows and n columns
 	std::vector<Scalar> data;
 };
 
@@ -120,12 +121,12 @@ size_t num_cols(const matrix<Scalar>& A) { return A.cols(); }
 // ostream operator: no need to declare as friend as it only uses public interfaces
 template<typename Scalar>
 std::ostream& operator<<(std::ostream& ostr, const matrix<Scalar>& A) {
-	auto width = ostr.precision() + 2;
+	auto width = ostr.width();
 	size_t m = A.rows();
 	size_t n = A.cols();
 	for (size_t i = 0; i < m; ++i) {
 		for (size_t j = 0; j < n; ++j) {
-			ostr << std::setw(width) << A(i, j) << " ";
+			ostr << std::fixed << std::setw(width) << A(i, j) << " ";
 		}
 		ostr << '\n';
 	}
@@ -199,22 +200,4 @@ matrix< posit<nbits, es> > operator*(const matrix< posit<nbits, es> >& A, const
 	return C;
 }
 
-// create a 2D difference equation matrix of a Laplacian stencil
-template<typename Scalar>
-void laplacian_setup(matrix<Scalar>& A, size_t m, size_t n) {
-	A.setzero();
-	assert(A.rows() == m * n);
-	for (size_t i = 0; i < m; ++i) {
-		for (size_t j = 0; j < n; ++j) {
-			Scalar four(4.0), minus_one(-1.0);
-			size_t row = i * n + j;
-			A(row, row) = four;
-			if (j < n - 1) A(row, row + 1) = minus_one;
-			if (i < m - 1) A(row, row + n) = minus_one;
-			if (j > 0) A(row, row - 1) = minus_one;
-			if (i > 0) A(row, row - n) = minus_one;
-		}
-	}
-}
-
 }}} // namespace sw::unum::blas