feat: update Js implementation and add C ndarray implementation for s…

…nrm2
stdlib-js · Sep 19, 2024 · 326e25e · 326e25e
1 parent 69234e5
commit 326e25e
Show file tree

Hide file tree

Showing 17 changed files with 765 additions and 182 deletions.
diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/README.md b/lib/node_modules/@stdlib/blas/base/snrm2/README.md
@@ -160,6 +160,129 @@ console.log( out );
 
 <!-- /.examples -->
 
+<!-- C interface documentation. -->
+
+* * *
+
+<section class="c">
+
+## C APIs
+
+<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
+
+<section class="intro">
+
+</section>
+
+<!-- /.intro -->
+
+<!-- C usage documentation. -->
+
+<section class="usage">
+
+### Usage
+
+```c
+#include "stdlib/blas/base/snrm2.h"
+```
+
+#### c_snrm2( N, \*X, stride )
+
+Computes the L2-norm of a complex single-precision floating-point vector.
+
+```c
+const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
+
+float norm = c_snrm2( 8, x, 1 );
+// returns 14.3
+```
+
+The function accepts the following arguments:
+
+-   **N**: `[in] CBLAS_INT` number of indexed elements.
+-   **X**: `[in] float*` input array.
+-   **stride**: `[in] CBLAS_INT` index increment for `X`.
+
+```c
+float c_snrm2( const CBLAS_INT N, const float *X, const CBLAS_INT stride );
+```
+
+#### c_snrm2_ndarray( N, \*X, stride, offset )
+
+Computes the L2-norm of a complex single-precision floating-point vector using alternative indexing semantics.
+
+```c
+const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
+
+float norm = c_snrm2_ndarray( 8, x, 1, 0 );
+// returns 14.3
+```
+
+The function accepts the following arguments:
+
+-   **N**: `[in] CBLAS_INT` number of indexed elements.
+-   **X**: `[in] float*` input array.
+-   **stride**: `[in] CBLAS_INT` index increment for `X`.
+-   **offset**: `[in] CBLAS_INT` starting index for `X`.
+
+```c
+float c_snrm2_ndarray( const CBLAS_INT N, const float *X, const CBLAS_INT stride, const CBLAS_INT offset );
+```
+
+</section>
+
+<!-- /.usage -->
+
+<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
+
+<section class="notes">
+
+</section>
+
+<!-- /.notes -->
+
+<!-- C API usage examples. -->
+
+<section class="examples">
+
+### Examples
+
+```c
+#include "stdlib/blas/base/snrm2.h"
+#include <stdio.h>
+
+int main( void ) {
+    // Create a strided array:
+    const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
+
+    // Specify the number of indexed elements:
+    const int N = 8;
+
+    // Specify a stride:
+    const int strideX = 1;
+
+    // Compute the L2-norm:
+    float l2 = c_snrm2( N, x, strideX );
+
+    // Print the result:
+    printf( "L2-norm: %f\n", l2 );
+
+    // Compute the L2-norm:
+    l2 = c_snrm2_ndarray( N, x, strideX, 0 );
+
+    // Print the result:
+    printf( "L2-norm: %f\n", l2 );
+}
+```
+
+</section>
+
+<!-- /.examples -->
+
+</section>
+
+<!-- /.c -->
+
 <!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
 
 <section class="related">

diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/docs/repl.txt b/lib/node_modules/@stdlib/blas/base/snrm2/docs/repl.txt
@@ -8,7 +8,7 @@
     Indexing is relative to the first index. To introduce an offset, use a typed
     array view.
 
-    If `N <= 0` or `stride <= 0`, the function returns `0`.
+    If `N <= 0` the function returns `0`.
 
     Parameters
     ----------

diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.js b/lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.js
@@ -20,9 +20,20 @@
 
 // MODULES //
 
-var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
+var FLOAT32_MAX = require( '@stdlib/constants/float32/max' );
+var f32 = require( '@stdlib/number/float64/base/to-float32' );
 var absf = require( '@stdlib/math/base/special/absf' );
-var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
+var abs2f = require( '@stdlib/math/base/special/abs2f' );
+var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
+
+
+// VARIABLES //
+
+// Blue's scaling constants:
+var tsml = 1.08420217E-19;
+var tbig = 4.50359963E+15;
+var ssml = 3.77789319E+22;
+var sbig = 1.32348898E-23;
 
 
 // MAIN //
@@ -45,37 +56,79 @@ var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
 * // returns 5.0
 */
 function snrm2( N, x, stride, offset ) {
-	var scale;
-	var ssq;
+	var notbig;
+	var sumsq;
+	var abig;
+	var amed;
+	var asml;
+	var ymax;
+	var ymin;
+	var scl;
 	var ax;
 	var ix;
-	var v;
 	var i;
 
 	if ( N <= 0 ) {
 		return 0.0;
 	}
-	if ( N === 1 ) {
-		return absf( x[ offset ] );
-	}
 	ix = offset;
-	scale = 0.0;
-	ssq = 1.0;
+
+	// Initialize loop values for accumulation:
+	notbig = true;
+
+	sumsq = 0.0;
+	abig = 0.0;
+	amed = 0.0;
+	asml = 0.0;
+	scl = 1.0;
+
+	// Compute the sum of squares using 3 accumulators--`abig` (sum of squares scaled down to avoid overflow), `asml` (sum of squares scaled up to avoid underflow), `amed` (sum of squares that do not require scaling)--and thresholds and multipliers--`tbig` (values bigger than this are scaled down by `sbig`) and `tsml` (values smaller than this are scaled up by `ssml`)...
 	for ( i = 0; i < N; i++ ) {
-		if ( x[ ix ] !== 0.0 ) {
-			ax = absf( x[ ix ] );
-			if ( scale < ax ) {
-				v = float64ToFloat32( scale/ax );
-				ssq = float64ToFloat32( 1.0 + float64ToFloat32( ssq * float64ToFloat32( v*v ) ) ); // eslint-disable-line max-len
-				scale = ax;
-			} else {
-				v = float64ToFloat32( ax/scale );
-				ssq = float64ToFloat32( ssq + float64ToFloat32( v*v ) );
+		ax = absf( x[ ix ] );
+		if ( ax > tbig ) {
+			abig = f32( abig + abs2f( ax * sbig ) );
+			notbig = false;
+		} else if ( ax < tsml ) {
+			if ( notbig ) {
+				asml = f32( asml + abs2f( ax * ssml ) );
 			}
+		} else {
+			amed = f32( amed + f32( ax * ax ) );
 		}
 		ix += stride;
 	}
-	return float64ToFloat32( scale * sqrtf( ssq ) );
+	// Combine `abig` and `amed` or `amed` and `asml` if more than one accumulator was used...
+	if ( abig > 0.0 ) {
+		// Combine `abig` and `amed` if `abig` > 0...
+		if ( amed > 0.0 || ( amed > FLOAT32_MAX ) || ( amed !== amed ) ) {
+			abig = f32( abig + f32( f32( amed * sbig ) * sbig ) );
+		}
+		scl = f32( 1.0 / sbig );
+		sumsq = abig;
+	} else if ( asml > 0.0 ) {
+		// Combine `amed` and `asml` if `asml` > 0...
+		if ( amed > 0.0 || amed > FLOAT32_MAX || ( amed !== amed ) ) {
+			amed = sqrtf( amed );
+			asml = f32( sqrtf( asml ) / ssml );
+			if ( asml > amed ) {
+				ymin = amed;
+				ymax = asml;
+			} else {
+				ymin = asml;
+				ymax = amed;
+			}
+			scl = 1.0;
+			sumsq = f32( f32( ymax * ymax ) * f32( 1.0 + abs2f( ymin / ymax ) ) ); // eslint-disable-line max-len
+		} else {
+			scl = f32( 1.0 / ssml );
+			sumsq = asml;
+		}
+	} else {
+		// All values are mid-range...
+		scl = 1.0;
+		sumsq = amed;
+	}
+	return f32( sqrtf( sumsq ) * scl );
 }
 
 

diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.native.js b/lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.native.js
@@ -20,9 +20,7 @@
 
 // MODULES //
 
-var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' );
-var offsetView = require( '@stdlib/strided/base/offset-view' );
-var addon = require( './snrm2.native.js' );
+var addon = require( './../src/addon.node' );
 
 
 // MAIN //
@@ -45,13 +43,7 @@ var addon = require( './snrm2.native.js' );
 * // returns 5.0
 */
 function snrm2( N, x, stride, offset ) {
-	var view;
-	offset = minViewBufferIndex( N, stride, offset );
-	if ( stride < 0 ) {
-		stride *= -1;
-	}
-	view = offsetView( x, offset );
-	return addon( N, view, stride );
+	return addon.ndarray( N, x, stride, offset );
 }
 
 

diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/lib/snrm2.js b/lib/node_modules/@stdlib/blas/base/snrm2/lib/snrm2.js
@@ -20,9 +20,8 @@
 
 // MODULES //
 
-var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
-var absf = require( '@stdlib/math/base/special/absf' );
-var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
+var stride2offset = require( '@stdlib/strided/base/stride2offset' );
+var ndarray = require( './ndarray.js' );
 
 
 // MAIN //
@@ -32,7 +31,7 @@ var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
 *
 * @param {PositiveInteger} N - number of indexed elements
 * @param {Float32Array} x - input array
-* @param {PositiveInteger} stride - stride length
+* @param {integer} stride - stride length
 * @returns {number} L2-norm
 *
 * @example
@@ -44,35 +43,8 @@ var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
 * // returns 3.0
 */
 function snrm2( N, x, stride ) {
-	var scale;
-	var ssq;
-	var ax;
-	var v;
-	var i;
-
-	if ( N <= 0 || stride <= 0 ) {
-		return 0.0;
-	}
-	if ( N === 1 ) {
-		return absf( x[ 0 ] );
-	}
-	scale = 0.0;
-	ssq = 1.0;
-	N *= stride;
-	for ( i = 0; i < N; i += stride ) {
-		if ( x[ i ] !== 0.0 ) {
-			ax = absf( x[ i ] );
-			if ( scale < ax ) {
-				v = float64ToFloat32( scale/ax );
-				ssq = float64ToFloat32( 1.0 + float64ToFloat32( ssq * float64ToFloat32( v*v ) ) ); // eslint-disable-line max-len
-				scale = ax;
-			} else {
-				v = float64ToFloat32( ax/scale );
-				ssq = float64ToFloat32( ssq + float64ToFloat32( v*v ) );
-			}
-		}
-	}
-	return float64ToFloat32( scale * sqrtf( ssq ) );
+	var ox = stride2offset( N, stride );
+	return ndarray( N, x, stride, ox );
 }
 
 

diff --git a/lib/node_modules/@stdlib/blas/base/snrm2/lib/snrm2.native.js b/lib/node_modules/@stdlib/blas/base/snrm2/lib/snrm2.native.js
@@ -30,7 +30,7 @@ var addon = require( './../src/addon.node' );
 *
 * @param {PositiveInteger} N - number of indexed elements
 * @param {Float32Array} x - input array
-* @param {PositiveInteger} stride - stride length
+* @param {integer} stride - stride length
 * @returns {number} L2-norm
 *
 * @example