From d5f90245df32f6f58ede5e352b59c4cdb04dc27f Mon Sep 17 00:00:00 2001 From: Jack Poulson Date: Tue, 2 Apr 2013 13:37:01 -0700 Subject: [PATCH] Better organizing SVD implementation and beginning to work on a REFERENCES file --- AUTHORS | 3 - REFERENCES | 28 + doc/source/core/imports/lapack.rst | 4 +- doc/source/lapack-like/eigen_svd.rst | 6 +- examples/lapack-like/SVD.cpp | 2 +- include/elemental/core/imports/lapack.hpp | 10 +- .../elemental/lapack-like/ConditionNumber.hpp | 4 +- .../lapack-like/HermitianNorm/KyFan.hpp | 10 +- .../lapack-like/HermitianNorm/Schatten.hpp | 10 +- .../lapack-like/HermitianNorm/Two.hpp | 10 +- .../elemental/lapack-like/HermitianSVD.hpp | 4 +- include/elemental/lapack-like/Norm/KyFan.hpp | 4 +- .../elemental/lapack-like/Norm/Schatten.hpp | 4 +- include/elemental/lapack-like/Norm/Two.hpp | 4 +- include/elemental/lapack-like/SVD.hpp | 824 +----------------- include/elemental/lapack-like/SVD/Chan.hpp | 236 +++++ .../lapack-like/SVD/GolubReinsch.hpp | 567 ++++++++++++ include/elemental/lapack-like/SVD/Util.hpp | 104 +++ .../lapack-like/SymmetricNorm/KyFan.hpp | 4 +- .../lapack-like/SymmetricNorm/Schatten.hpp | 4 +- .../lapack-like/SymmetricNorm/Two.hpp | 4 +- src/core/imports/lapack.cpp | 18 +- 22 files changed, 999 insertions(+), 865 deletions(-) create mode 100644 REFERENCES create mode 100644 include/elemental/lapack-like/SVD/Chan.hpp create mode 100644 include/elemental/lapack-like/SVD/GolubReinsch.hpp create mode 100644 include/elemental/lapack-like/SVD/Util.hpp diff --git a/AUTHORS b/AUTHORS index d2b351734c..cf3621cdca 100644 --- a/AUTHORS +++ b/AUTHORS @@ -27,6 +27,3 @@ Jed Brown and Argonne National Laboratory: Jeff Hammond and Argonne National Laboratory: Several bug fixes and toolchain files for BlueGene/P and BlueGene/Q, and an improvement for Elemental's threading support. - -Jack Poulson, Lexing Ying, and The University of Texas at Austin: - The parallel linear congruential generator. diff --git a/REFERENCES b/REFERENCES new file mode 100644 index 0000000000..2abf205f4c --- /dev/null +++ b/REFERENCES @@ -0,0 +1,28 @@ +NOTE: This list of references will hopefully grow very soon. I would like to + move towards making Elemental (and all of my open source efforts) closer + to a publication. + +[1] Tony F. Chan, "An improved algorithm for computing the Singular Value + Decomposition", ACM Transactions on Mathematical Software, Vol. 8, No. 1, + pp. 72--83, 1982. + http://www.stat.uchicago.edu/~lekheng/courses/324/chan.pdf + + NOTE: This paper introduced the idea of using a QR decomposition as a first + step in the SVD of a non-square matrix in order to accelerate the + computation. + +[2] Gene H. Golub and Christian Reinsch, "Singular value decomposition and + least squares solutions", Numerische Mathematik, Vol. 14, No. 5, + pp. 403--420, 1970. + + NOTE: This paper introduced the standard algorithm for computing the SVD. + +[3] Zhongxiao Jia, "Using cross-product matrices to compute the SVD", + Numerical Algorithms, 42:31--61, 2006. + http://faculty.math.tsinghua.edu.cn/~zjia/jianumalgo.pdf + + NOTE: This paper could serve as a foundation for achieving high absolute + accuracy in a cross-product based algorithm for computing the SVD. + Such an approach should be more scalable than the current + bidiagonalization-based approach. + diff --git a/doc/source/core/imports/lapack.rst b/doc/source/core/imports/lapack.rst index ba72b3825c..68db7cd255 100644 --- a/doc/source/core/imports/lapack.rst +++ b/doc/source/core/imports/lapack.rst @@ -92,8 +92,8 @@ QR-based SVD Computes the singular value decomposition of a general matrix by running the QR algorithm on the condensed bidiagonal matrix. -.. cpp:function:: void lapack::SingularValues( int m, int n, R* A, int lda, R* s ) -.. cpp:function:: void lapack::SingularValues( int m, int n, Complex* A, int lda, R* s ) +.. cpp:function:: void lapack::SVD( int m, int n, R* A, int lda, R* s ) +.. cpp:function:: void lapack::SVD( int m, int n, Complex* A, int lda, R* s ) Computes the singular values of a general matrix by running the QR algorithm on the condensed bidiagonal matrix. diff --git a/doc/source/lapack-like/eigen_svd.rst b/doc/source/lapack-like/eigen_svd.rst index 8a2f7d07ff..8ed08ce25e 100644 --- a/doc/source/lapack-like/eigen_svd.rst +++ b/doc/source/lapack-like/eigen_svd.rst @@ -374,7 +374,7 @@ flips introduced by negative eigenvalues. singular vector matrices, :math:`U` and :math:`V`, such that :math:`A=U \mathrm{diag}(s) V^H`. -.. cpp:function:: void HermitianSingularValues( UpperOrLower uplo, DistMatrix& A, DistMatrix::type,VR,STAR>& s ) +.. cpp:function:: void HermitianSVD( UpperOrLower uplo, DistMatrix& A, DistMatrix::type,VR,STAR>& s ) Return the singular values of :math:`A` in `s`. Note that the appropriate triangle of `A` is overwritten during computation. @@ -426,9 +426,9 @@ non-negative entries. Overwrites `A` with :math:`U`, `s` with the diagonal entries of :math:`\Sigma`, and `V` with :math:`V`. -.. cpp:function:: void SingularValues( Matrix& A, Matrix::type>& s ) +.. cpp:function:: void SVD( Matrix& A, Matrix::type>& s ) -.. cpp:function:: void SingularValues( DistMatrix& A, DistMatrix::type,VR,STAR>& s ) +.. cpp:function:: void SVD( DistMatrix& A, DistMatrix::type,VR,STAR>& s ) Forms the singular values of :math:`A` in `s`. Note that `A` is overwritten in order to compute the singular values. diff --git a/examples/lapack-like/SVD.cpp b/examples/lapack-like/SVD.cpp index e57e4e72a5..6f44293e33 100644 --- a/examples/lapack-like/SVD.cpp +++ b/examples/lapack-like/SVD.cpp @@ -54,7 +54,7 @@ main( int argc, char* argv[] ) // Compute just the singular values DistMatrix sOnly( g ); DistMatrix U( A ); - SingularValues( U, sOnly ); + SVD( U, sOnly ); // Compute the SVD of A DistMatrix V( g ); diff --git a/include/elemental/core/imports/lapack.hpp b/include/elemental/core/imports/lapack.hpp index 3a83fa813e..7b993861c0 100644 --- a/include/elemental/core/imports/lapack.hpp +++ b/include/elemental/core/imports/lapack.hpp @@ -127,13 +127,13 @@ void QRSVD double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva ); // -// Compute the singular values of a general matrix using the QR algorithm +// Compute the singular values of a general matrix (using the QR algorithm) // -void SingularValues( int m, int n, float* A, int lda, float* s ); -void SingularValues( int m, int n, double* A, int lda, double* s ); -void SingularValues( int m, int n, scomplex* A, int lda, float* s ); -void SingularValues( int m, int n, dcomplex* A, int lda, double* s ); +void SVD( int m, int n, float* A, int lda, float* s ); +void SVD( int m, int n, double* A, int lda, double* s ); +void SVD( int m, int n, scomplex* A, int lda, float* s ); +void SVD( int m, int n, dcomplex* A, int lda, double* s ); // // Compute the SVD of a bidiagonal matrix using the QR algorithm diff --git a/include/elemental/lapack-like/ConditionNumber.hpp b/include/elemental/lapack-like/ConditionNumber.hpp index 9150157ac2..74ca1d0a1d 100644 --- a/include/elemental/lapack-like/ConditionNumber.hpp +++ b/include/elemental/lapack-like/ConditionNumber.hpp @@ -25,7 +25,7 @@ ConditionNumber( const Matrix& A ) Matrix B( A ); Matrix s; - SingularValues( B, s ); + SVD( B, s ); R cond = 1; const int numVals = s.Height(); @@ -48,7 +48,7 @@ ConditionNumber( const DistMatrix& A ) DistMatrix B( A ); DistMatrix s( A.Grid() ); - SingularValues( B, s ); + SVD( B, s ); R cond = 1; const int numVals = s.Height(); diff --git a/include/elemental/lapack-like/HermitianNorm/KyFan.hpp b/include/elemental/lapack-like/HermitianNorm/KyFan.hpp index 45e03679a0..1ad3d3970b 100644 --- a/include/elemental/lapack-like/HermitianNorm/KyFan.hpp +++ b/include/elemental/lapack-like/HermitianNorm/KyFan.hpp @@ -35,14 +35,14 @@ HermitianKyFanNorm( UpperOrLower uplo, const Matrix& A, int k ) // TODO: Enable sequential MRRR /* #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif // ifdef HAVE_PMRRR */ MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); R norm = 0; for( int j=k-1; j>=0; --j ) @@ -67,10 +67,10 @@ HermitianKyFanNorm( UpperOrLower uplo, const DistMatrix& A, int k ) DistMatrix B( A ); DistMatrix s( A.Grid() ); #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif // ifdef HAVE_PMRRR R localNorm = 0; diff --git a/include/elemental/lapack-like/HermitianNorm/Schatten.hpp b/include/elemental/lapack-like/HermitianNorm/Schatten.hpp index e2e11bbe8b..7202474203 100644 --- a/include/elemental/lapack-like/HermitianNorm/Schatten.hpp +++ b/include/elemental/lapack-like/HermitianNorm/Schatten.hpp @@ -35,14 +35,14 @@ HermitianSchattenNorm // TODO: Enable sequential MRRR /* #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif // ifdef HAVE_PMRRR */ MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); // TODO: Think of how to make this more stable const int k = s.Height(); @@ -69,10 +69,10 @@ HermitianSchattenNorm DistMatrix B( A ); DistMatrix s( A.Grid() ); #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif // ifdef HAVE_PMRRR // TODO: Think of how to make this more stable diff --git a/include/elemental/lapack-like/HermitianNorm/Two.hpp b/include/elemental/lapack-like/HermitianNorm/Two.hpp index 7da32270ee..2c63f93644 100644 --- a/include/elemental/lapack-like/HermitianNorm/Two.hpp +++ b/include/elemental/lapack-like/HermitianNorm/Two.hpp @@ -33,14 +33,14 @@ HermitianTwoNorm( UpperOrLower uplo, const Matrix& A ) // TODO: Enable support for sequential MRRR /* #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif */ MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); const R norm = InfinityNorm( s ); #ifndef RELEASE PopCallStack(); @@ -59,10 +59,10 @@ HermitianTwoNorm( UpperOrLower uplo, const DistMatrix& A ) DistMatrix B( A ); DistMatrix s( A.Grid() ); #ifdef HAVE_PMRRR - HermitianSingularValues( uplo, B, s ); + HermitianSVD( uplo, B, s ); #else MakeHermitian( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); #endif const R norm = InfinityNorm( s ); #ifndef RELEASE diff --git a/include/elemental/lapack-like/HermitianSVD.hpp b/include/elemental/lapack-like/HermitianSVD.hpp index 14add8995e..a80a74f782 100644 --- a/include/elemental/lapack-like/HermitianSVD.hpp +++ b/include/elemental/lapack-like/HermitianSVD.hpp @@ -69,12 +69,12 @@ inline void HermitianSVD } template -inline void HermitianSingularValues +inline void HermitianSVD ( UpperOrLower uplo, DistMatrix& A, DistMatrix::type,VR,STAR>& s ) { #ifndef RELEASE - PushCallStack("HermitianSingularValues"); + PushCallStack("HermitianSVD"); #endif typedef typename Base::type R; diff --git a/include/elemental/lapack-like/Norm/KyFan.hpp b/include/elemental/lapack-like/Norm/KyFan.hpp index 90ef7f79b4..d9c9231eed 100644 --- a/include/elemental/lapack-like/Norm/KyFan.hpp +++ b/include/elemental/lapack-like/Norm/KyFan.hpp @@ -27,7 +27,7 @@ KyFanNorm( const Matrix& A, int k ) typedef typename Base::type R; Matrix B( A ); Matrix s; - SingularValues( B, s ); + SVD( B, s ); R norm = 0; for( int j=k-1; j>=0; --j ) @@ -51,7 +51,7 @@ KyFanNorm( const DistMatrix& A, int k ) typedef typename Base::type R; DistMatrix B( A ); DistMatrix s( A.Grid() ); - SingularValues( B, s ); + SVD( B, s ); R localNorm = 0; DistMatrix sTop( A.Grid() ); diff --git a/include/elemental/lapack-like/Norm/Schatten.hpp b/include/elemental/lapack-like/Norm/Schatten.hpp index 42de4dd612..193c20a68f 100644 --- a/include/elemental/lapack-like/Norm/Schatten.hpp +++ b/include/elemental/lapack-like/Norm/Schatten.hpp @@ -24,7 +24,7 @@ SchattenNorm( const Matrix& A, typename Base::type p ) typedef typename Base::type R; Matrix B( A ); Matrix s; - SingularValues( B, s ); + SVD( B, s ); // TODO: Think of how to make this more stable const int k = s.Height(); @@ -48,7 +48,7 @@ SchattenNorm( const DistMatrix& A, typename Base::type p ) typedef typename Base::type R; DistMatrix B( A ); DistMatrix s( A.Grid() ); - SingularValues( B, s ); + SVD( B, s ); // TODO: Think of how to make this more stable const int kLocal = s.LocalHeight(); diff --git a/include/elemental/lapack-like/Norm/Two.hpp b/include/elemental/lapack-like/Norm/Two.hpp index 13cadaf840..a5c4e98ee9 100644 --- a/include/elemental/lapack-like/Norm/Two.hpp +++ b/include/elemental/lapack-like/Norm/Two.hpp @@ -25,7 +25,7 @@ TwoNorm( const Matrix& A ) typedef typename Base::type R; Matrix B( A ); Matrix s; - SingularValues( B, s ); + SVD( B, s ); const R norm = InfinityNorm( s ); #ifndef RELEASE PopCallStack(); @@ -43,7 +43,7 @@ TwoNorm( const DistMatrix& A ) typedef typename Base::type R; DistMatrix B( A ); DistMatrix s( A.Grid() ); - SingularValues( B, s ); + SVD( B, s ); const R norm = InfinityNorm( s ); #ifndef RELEASE PopCallStack(); diff --git a/include/elemental/lapack-like/SVD.hpp b/include/elemental/lapack-like/SVD.hpp index 16d978f85a..89f8f9d64a 100644 --- a/include/elemental/lapack-like/SVD.hpp +++ b/include/elemental/lapack-like/SVD.hpp @@ -10,757 +10,10 @@ #ifndef LAPACK_SVD_HPP #define LAPACK_SVD_HPP -#include "elemental/blas-like/level1/Adjoint.hpp" -#include "elemental/blas-like/level1/MakeTriangular.hpp" -#include "elemental/blas-like/level1/Scale.hpp" -#include "elemental/blas-like/level1/Transpose.hpp" -#include "elemental/blas-like/level3/Gemm.hpp" -#include "elemental/lapack-like/ApplyPackedReflectors.hpp" -#include "elemental/lapack-like/Bidiag.hpp" -#include "elemental/lapack-like/ExplicitQR.hpp" -#include "elemental/lapack-like/Norm/One.hpp" -#include "elemental/lapack-like/QR.hpp" -#include "elemental/matrices/Identity.hpp" -#include "elemental/matrices/Zeros.hpp" +#include "elemental/lapack-like/SVD/Chan.hpp" namespace elem { -namespace svd { - -template -inline void -SimpleSVDUpper -( DistMatrix& A, - DistMatrix& s, - DistMatrix& V ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSVDUpper"); -#endif - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - Bidiag( A ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetDiagonal( d_MD_STAR ); - A.GetDiagonal( e_MD_STAR, offdiagonal ); - - // In order to use serial QR kernels, we need the full bidiagonal matrix - // on each process. - // - // NOTE: lapack::BidiagQRAlg expects e to be of length k - DistMatrix d_STAR_STAR( d_MD_STAR ); - DistMatrix eHat_STAR_STAR( k, 1, g ); - DistMatrix e_STAR_STAR( g ); - View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); - e_STAR_STAR = e_MD_STAR; - - // Initialize U and VTrans to the appropriate identity matrices. - DistMatrix U_VC_STAR( g ); - DistMatrix VTrans_STAR_VC( g ); - U_VC_STAR.AlignWith( A ); - VTrans_STAR_VC.AlignWith( V ); - Identity( m, k, U_VC_STAR ); - Identity( k, n, VTrans_STAR_VC ); - - // Compute the SVD of the bidiagonal matrix and accumulate the Givens - // rotations into our local portion of U and VTrans - Matrix& ULocal = U_VC_STAR.Matrix(); - Matrix& VTransLocal = VTrans_STAR_VC.Matrix(); - lapack::BidiagQRAlg - ( uplo, k, VTransLocal.Width(), ULocal.Height(), - d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), - VTransLocal.Buffer(), VTransLocal.LDim(), - ULocal.Buffer(), ULocal.LDim() ); - - // Make a copy of A (for the Householder vectors) and pull the necessary - // portions of U and VTrans into a standard matrix dist. - DistMatrix B( A ); - if( m >= n ) - { - DistMatrix AT( g ), - AB( g ); - DistMatrix UT_VC_STAR( g ), - UB_VC_STAR( g ); - PartitionDown( A, AT, - AB, n ); - PartitionDown( U_VC_STAR, UT_VC_STAR, - UB_VC_STAR, n ); - AT = UT_VC_STAR; - MakeZeros( AB ); - Transpose( VTrans_STAR_VC, V ); - } - else - { - DistMatrix VT( g ), - VB( g ); - DistMatrix VTransL_STAR_VC( g ), VTransR_STAR_VC( g ); - PartitionDown( V, VT, - VB, m ); - PartitionRight( VTrans_STAR_VC, VTransL_STAR_VC, VTransR_STAR_VC, m ); - Transpose( VTransL_STAR_VC, VT ); - MakeZeros( VB ); - } - - // Backtransform U and V - if( m >= n ) - { - ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); - ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); - } - else - { - ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); - ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); - } - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -SimpleSVDUpper -( DistMatrix >& A, - DistMatrix& s, - DistMatrix >& V ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSVDUpper"); -#endif - typedef Complex C; - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - DistMatrix tP( g ), tQ( g ); - Bidiag( A, tP, tQ ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetRealPartOfDiagonal( d_MD_STAR ); - A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); - - // NOTE: lapack::BidiagQRAlg expects e to be of length k - DistMatrix d_STAR_STAR( d_MD_STAR ); - DistMatrix eHat_STAR_STAR( k, 1, g ); - DistMatrix e_STAR_STAR( g ); - View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); - e_STAR_STAR = e_MD_STAR; - - // Initialize U and VAdj to the appropriate identity matrices - DistMatrix U_VC_STAR( g ); - DistMatrix VAdj_STAR_VC( g ); - U_VC_STAR.AlignWith( A ); - VAdj_STAR_VC.AlignWith( V ); - Identity( m, k, U_VC_STAR ); - Identity( k, n, VAdj_STAR_VC ); - - // Compute the SVD of the bidiagonal matrix and accumulate the Givens - // rotations into our local portion of U and VAdj - Matrix& ULocal = U_VC_STAR.Matrix(); - Matrix& VAdjLocal = VAdj_STAR_VC.Matrix(); - lapack::BidiagQRAlg - ( uplo, k, VAdjLocal.Width(), ULocal.Height(), - d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), - VAdjLocal.Buffer(), VAdjLocal.LDim(), - ULocal.Buffer(), ULocal.LDim() ); - - // Make a copy of A (for the Householder vectors) and pull the necessary - // portions of U and VAdj into a standard matrix dist. - DistMatrix B( A ); - if( m >= n ) - { - DistMatrix AT( g ), - AB( g ); - DistMatrix UT_VC_STAR( g ), - UB_VC_STAR( g ); - PartitionDown( A, AT, - AB, n ); - PartitionDown( U_VC_STAR, UT_VC_STAR, - UB_VC_STAR, n ); - AT = UT_VC_STAR; - MakeZeros( AB ); - Adjoint( VAdj_STAR_VC, V ); - } - else - { - DistMatrix VT( g ), - VB( g ); - DistMatrix VAdjL_STAR_VC( g ), VAdjR_STAR_VC( g ); - PartitionDown( V, VT, - VB, m ); - PartitionRight( VAdj_STAR_VC, VAdjL_STAR_VC, VAdjR_STAR_VC, m ); - Adjoint( VAdjL_STAR_VC, VT ); - MakeZeros( VB ); - } - - // Backtransform U and V - if( m >= n ) - { - ApplyPackedReflectors - ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, B, tQ, A ); - ApplyPackedReflectors - ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 1, B, tP, V ); - } - else - { - ApplyPackedReflectors - ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, -1, B, tQ, A ); - ApplyPackedReflectors - ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 0, B, tP, V ); - } - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} - -#ifdef HAVE_FLA_BSVD -template<> -inline void -SimpleSVDUpper -( DistMatrix& A, - DistMatrix& s, - DistMatrix& V ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSVDUpper"); -#endif - typedef double Real; - typedef Complex C; - - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - Bidiag( A ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetDiagonal( d_MD_STAR ); - A.GetDiagonal( e_MD_STAR, offdiagonal ); - - // In order to use serial QR kernels, we need the full bidiagonal matrix - // on each process - DistMatrix d_STAR_STAR( d_MD_STAR ), - e_STAR_STAR( e_MD_STAR ); - - // Initialize U and V to the appropriate identity matrices. - DistMatrix U_VC_STAR( g ); - DistMatrix V_VC_STAR( g ); - U_VC_STAR.AlignWith( A ); - V_VC_STAR.AlignWith( V ); - Identity( m, k, U_VC_STAR ); - Identity( n, k, V_VC_STAR ); - - // Compute the SVD of the bidiagonal matrix and accumulate the Givens - // rotations into our local portion of U and V - // NOTE: This _only_ works in the case where m >= n - const int numAccum = 32; - const int maxNumIts = 30; - const int bAlg = 512; - std::vector GBuffer( (k-1)*numAccum ), - HBuffer( (k-1)*numAccum ); - FLA_Bsvd_v_opd_var1 - ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), - numAccum, maxNumIts, - d_STAR_STAR.Buffer(), 1, - e_STAR_STAR.Buffer(), 1, - &GBuffer[0], 1, k-1, - &HBuffer[0], 1, k-1, - U_VC_STAR.Buffer(), 1, U_VC_STAR.LDim(), - V_VC_STAR.Buffer(), 1, V_VC_STAR.LDim(), - bAlg ); - - // Make a copy of A (for the Householder vectors) and pull the necessary - // portions of U and V into a standard matrix dist. - DistMatrix B( A ); - if( m >= n ) - { - DistMatrix AT( g ), - AB( g ); - DistMatrix UT_VC_STAR( g ), - UB_VC_STAR( g ); - PartitionDown( A, AT, - AB, n ); - PartitionDown( U_VC_STAR, UT_VC_STAR, - UB_VC_STAR, n ); - AT = UT_VC_STAR; - MakeZeros( AB ); - V = V_VC_STAR; - } - else - { - DistMatrix VT( g ), - VB( g ); - DistMatrix VT_VC_STAR( g ), - VB_VC_STAR( g ); - PartitionDown( V, VT, - VB, m ); - PartitionDown - ( V_VC_STAR, VT_VC_STAR, - VB_VC_STAR, m ); - VT = VT_VC_STAR; - MakeZeros( VB ); - } - - // Backtransform U and V - if( m >= n ) - { - ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); - ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); - } - else - { - ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); - ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); - } - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} - -template<> -inline void -SimpleSVDUpper -( DistMatrix >& A, - DistMatrix& s, - DistMatrix >& V ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSVDUpper"); -#endif - typedef double Real; - typedef Complex C; - - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - DistMatrix tP( g ), tQ( g ); - Bidiag( A, tP, tQ ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetRealPartOfDiagonal( d_MD_STAR ); - A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); - - // In order to use serial QR kernels, we need the full bidiagonal matrix - // on each process - DistMatrix d_STAR_STAR( d_MD_STAR ), - e_STAR_STAR( e_MD_STAR ); - - // Initialize U and VAdj to the appropriate identity matrices - DistMatrix U_VC_STAR( g ); - DistMatrix V_VC_STAR( g ); - U_VC_STAR.AlignWith( A ); - V_VC_STAR.AlignWith( V ); - Identity( m, k, U_VC_STAR ); - Identity( n, k, V_VC_STAR ); - - // Compute the SVD of the bidiagonal matrix and accumulate the Givens - // rotations into our local portion of U and V - // NOTE: This _only_ works in the case where m >= n - const int numAccum = 32; - const int maxNumIts = 30; - const int bAlg = 512; - std::vector GBuffer( (k-1)*numAccum ), - HBuffer( (k-1)*numAccum ); - FLA_Bsvd_v_opz_var1 - ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), - numAccum, maxNumIts, - d_STAR_STAR.Buffer(), 1, - e_STAR_STAR.Buffer(), 1, - &GBuffer[0], 1, k-1, - &HBuffer[0], 1, k-1, - U_VC_STAR.Buffer(), 1, U_VC_STAR.LDim(), - V_VC_STAR.Buffer(), 1, V_VC_STAR.LDim(), - bAlg ); - - // Make a copy of A (for the Householder vectors) and pull the necessary - // portions of U and V into a standard matrix dist. - DistMatrix B( A ); - if( m >= n ) - { - DistMatrix AT( g ), - AB( g ); - DistMatrix UT_VC_STAR( g ), - UB_VC_STAR( g ); - PartitionDown( A, AT, - AB, n ); - PartitionDown( U_VC_STAR, UT_VC_STAR, - UB_VC_STAR, n ); - AT = UT_VC_STAR; - MakeZeros( AB ); - V = V_VC_STAR; - } - else - { - DistMatrix VT( g ), - VB( g ); - DistMatrix VT_VC_STAR( g ), - VB_VC_STAR( g ); - PartitionDown( V, VT, - VB, m ); - PartitionDown - ( V_VC_STAR, VT_VC_STAR, - VB_VC_STAR, m ); - VT = VT_VC_STAR; - MakeZeros( VB ); - } - - // Backtransform U and V - if( m >= n ) - { - ApplyPackedReflectors - ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, B, tQ, A ); - ApplyPackedReflectors - ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 1, B, tP, V ); - } - else - { - ApplyPackedReflectors - ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, -1, B, tQ, A ); - ApplyPackedReflectors - ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 0, B, tP, V ); - } - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} -#endif // HAVE_FLA_BSVD - -template -inline void -SVDUpper -( DistMatrix& A, - DistMatrix::type,VR,STAR>& s, - DistMatrix& V, - double heightRatio=1.5 ) -{ -#ifndef RELEASE - PushCallStack("svd::SVDUpper"); - if( A.Height() < A.Width() ) - throw std::logic_error("A must be at least as tall as it is wide"); - if( heightRatio <= 1.0 ) - throw std::logic_error("Nonsensical switchpoint for SVD"); -#endif - const Grid& g = A.Grid(); - const int m = A.Height(); - const int n = A.Width(); - if( m > heightRatio*n ) - { - DistMatrix R(g); - ExplicitQR( A, R ); - svd::SimpleSVDUpper( R, s, V ); - // Unfortunately, extra memory is used in forming A := A R, - // where A has been overwritten with the Q from the QR factorization - // of the original state of A, and R has been overwritten with the U - // from the SVD of the R from the QR factorization of A - // - // Perhaps this should be broken into pieces. - DistMatrix ACopy( A ); - Gemm( NORMAL, NORMAL, F(1), ACopy, R, F(0), A ); - } - else - { - svd::SimpleSVDUpper( A, s, V ); - } -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -SimpleSingularValuesUpper -( DistMatrix& A, - DistMatrix& s ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSingularValuesUpper"); -#endif - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - Bidiag( A ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetDiagonal( d_MD_STAR ); - A.GetDiagonal( e_MD_STAR, offdiagonal ); - - - // In order to use serial QR kernels, we need the full bidiagonal matrix - // on each process - // - // NOTE: lapack::BidiagQRAlg expects e to be of length k - DistMatrix d_STAR_STAR( d_MD_STAR ); - DistMatrix eHat_STAR_STAR( k, 1, g ); - DistMatrix e_STAR_STAR( g ); - View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); - e_STAR_STAR = e_MD_STAR; - - // Compute the singular values of the bidiagonal matrix - lapack::BidiagQRAlg - ( uplo, k, 0, 0, - d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), - (Real*)0, 1, (Real*)0, 1 ); - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -SimpleSingularValuesUpper -( DistMatrix >& A, - DistMatrix& s ) -{ -#ifndef RELEASE - PushCallStack("svd::SimpleSingularValuesUpper"); -#endif - typedef Complex C; - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min( m, n ); - const int offdiagonal = ( m>=n ? 1 : -1 ); - const char uplo = ( m>=n ? 'U' : 'L' ); - const Grid& g = A.Grid(); - - // Bidiagonalize A - DistMatrix tP(g), tQ(g); - Bidiag( A, tP, tQ ); - - // Grab copies of the diagonal and sub/super-diagonal of A - DistMatrix d_MD_STAR( g ), - e_MD_STAR( g ); - A.GetRealPartOfDiagonal( d_MD_STAR ); - A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); - - // In order to use serial QR kernels, we need the full bidiagonal matrix - // on each process - // - // NOTE: lapack::BidiagQRAlg expects e to be of length k - DistMatrix d_STAR_STAR( d_MD_STAR ); - DistMatrix eHat_STAR_STAR( k, 1, g ); - DistMatrix e_STAR_STAR( g ); - View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); - e_STAR_STAR = e_MD_STAR; - - // Compute the singular values of the bidiagonal matrix - lapack::BidiagQRAlg - ( uplo, k, 0, 0, - d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), - (C*)0, 1, (C*)0, 1 ); - - // Copy out the appropriate subset of the singular values - s = d_STAR_STAR; -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -SingularValuesUpper -( DistMatrix& A, - DistMatrix& s, - double heightRatio=1.2 ) -{ -#ifndef RELEASE - PushCallStack("svd::SingularValuesUpper"); - if( heightRatio <= 1.0 ) - throw std::logic_error("Nonsensical switchpoint for SingularValues"); -#endif - const Grid& g = A.Grid(); - const int m = A.Height(); - const int n = A.Width(); - if( m >= heightRatio*n ) - { - QR( A ); - DistMatrix AT(g), - AB(g); - PartitionDown - ( A, AT, - AB, n ); - MakeTriangular( UPPER, AT ); - SimpleSingularValuesUpper( AT, s ); - } - else - { - SimpleSingularValuesUpper( A, s ); - } -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -SingularValuesUpper -( DistMatrix >& A, - DistMatrix& s, - double heightRatio=1.2 ) -{ -#ifndef RELEASE - PushCallStack("svd::SingularValuesUpper"); - if( heightRatio <= 1.0 ) - throw std::logic_error("Nonsensical switchpoint for SingularValues"); -#endif - typedef Complex C; - const Grid& g = A.Grid(); - const int m = A.Height(); - const int n = A.Width(); - if( m >= heightRatio*n ) - { - DistMatrix t(g); - QR( A, t ); - DistMatrix AT(g), - AB(g); - PartitionDown - ( A, AT, - AB, n ); - MakeTriangular( UPPER, AT ); - SimpleSingularValuesUpper( AT, s ); - } - else - { - SimpleSingularValuesUpper( A, s ); - } -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -CheckScale -( DistMatrix& A, bool& needRescaling, typename Base::type& scale ) -{ - typedef typename Base::type R; - - scale = 1; - needRescaling = false; - const R oneNormOfA = OneNorm( A ); - const R safeMin = lapack::MachineSafeMin(); - const R precision = lapack::MachinePrecision(); - const R smallNumber = safeMin/precision; - const R bigNumber = 1/smallNumber; - const R rhoMin = Sqrt(smallNumber); - const R rhoMax = std::min( Sqrt(bigNumber), 1/Sqrt(Sqrt(safeMin)) ); - - if( oneNormOfA > 0 && oneNormOfA < rhoMin ) - { - needRescaling = true; - scale = rhoMin/oneNormOfA; - } - else if( oneNormOfA > rhoMax ) - { - needRescaling = true; - scale = rhoMax/oneNormOfA; - } -} - -template -inline void -DivideAndConquerSVD -( Matrix& A, Matrix::type>& s, Matrix& V ) -{ -#ifndef RELEASE - PushCallStack("svd::DivideAndConquerSVD"); -#endif - typedef typename Base::type R; - - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min(m,n); - s.ResizeTo( k, 1 ); - Matrix U( m, k ); - Matrix VAdj( k, n ); - lapack::DivideAndConquerSVD - ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), - VAdj.Buffer(), VAdj.LDim() ); - - A = U; - Adjoint( VAdj, V ); -#ifndef RELEASE - PopCallStack(); -#endif -} - -template -inline void -QRSVD( Matrix& A, Matrix::type>& s, Matrix& V ) -{ -#ifndef RELEASE - PushCallStack("svd::QRSVD"); -#endif - typedef typename Base::type R; - - const int m = A.Height(); - const int n = A.Width(); - const int k = std::min(m,n); - s.ResizeTo( k, 1 ); - Matrix U( m, k ); - Matrix VAdj( k, n ); - lapack::QRSVD - ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), - VAdj.Buffer(), VAdj.LDim() ); - - A = U; - Adjoint( VAdj, V ); -#ifndef RELEASE - PopCallStack(); -#endif -} - -} // namespace svd - //----------------------------------------------------------------------------// // Grab the full SVD of the general matrix A, A = U diag(s) V^H. // // On exit, A is overwritten with U. // @@ -794,49 +47,24 @@ SVD { #ifndef RELEASE PushCallStack("SVD"); - if( heightRatio <= 1.0 ) - throw std::logic_error("Nonsensical switchpoint for SVD"); #endif - typedef typename Base::type Real; - - // Check if we need to rescale the matrix, and do so if necessary - bool needRescaling; - Real scale; - svd::CheckScale( A, needRescaling, scale ); - if( needRescaling ) - Scale( scale, A ); - - // TODO: Switch between different algorithms. For instance, starting - // with a QR decomposition of tall-skinny matrices. - if( A.Height() >= A.Width() ) - { - svd::SVDUpper( A, s, V, heightRatio ); - } - else - { - // Lower bidiagonalization is not yet supported, so we instead play a - // trick to get the SVD of A. - Adjoint( A, V ); - svd::SVDUpper( V, s, A, heightRatio ); - } - - // Rescale the singular values if necessary - if( needRescaling ) - Scale( 1/scale, s ); + // TODO: Add more options + svd::Chan( A, s, V, heightRatio ); #ifndef RELEASE PopCallStack(); #endif } //----------------------------------------------------------------------------// -// Grab the singular values of the general matrix A. // +// Grab the singular values of the general matrix A using the QR algorithm. // //----------------------------------------------------------------------------// + template inline void -SingularValues( Matrix& A, Matrix::type>& s ) +SVD( Matrix& A, Matrix::type>& s ) { #ifndef RELEASE - PushCallStack("SingularValues"); + PushCallStack("SVD"); #endif typedef typename Base::type R; @@ -844,7 +72,7 @@ SingularValues( Matrix& A, Matrix::type>& s ) const int n = A.Width(); const int k = std::min(m,n); s.ResizeTo( k, 1 ); - lapack::SingularValues( m, n, A.Buffer(), A.LDim(), s.Buffer() ); + lapack::SVD( m, n, A.Buffer(), A.LDim(), s.Buffer() ); #ifndef RELEASE PopCallStack(); #endif @@ -852,41 +80,15 @@ SingularValues( Matrix& A, Matrix::type>& s ) template inline void -SingularValues -( DistMatrix& A, - DistMatrix::type,VR,STAR>& s, +SVD +( DistMatrix& A, DistMatrix::type,VR,STAR>& s, double heightRatio=1.2 ) { #ifndef RELEASE - PushCallStack("SingularValues"); + PushCallStack("SVD"); #endif - typedef typename Base::type R; - - // Check if we need to rescale the matrix, and do so if necessary - bool needRescaling; - R scale; - svd::CheckScale( A, needRescaling, scale ); - if( needRescaling ) - Scale( scale, A ); - - // TODO: Switch between different algorithms. For instance, starting - // with a QR decomposition of tall-skinny matrices. - if( A.Height() >= A.Width() ) - { - svd::SingularValuesUpper( A, s, heightRatio ); - } - else - { - // Lower bidiagonalization is not yet supported, so we instead play a - // trick to get the SVD of A. - DistMatrix AAdj( A.Grid() ); - Adjoint( A, AAdj ); - svd::SingularValuesUpper( AAdj, s, heightRatio ); - } - - // Rescale the singular values if necessary - if( needRescaling ) - Scale( 1/scale, s ); + // TODO: Add more options + svd::Chan( A, s, heightRatio ); #ifndef RELEASE PopCallStack(); #endif diff --git a/include/elemental/lapack-like/SVD/Chan.hpp b/include/elemental/lapack-like/SVD/Chan.hpp new file mode 100644 index 0000000000..82b18c8db8 --- /dev/null +++ b/include/elemental/lapack-like/SVD/Chan.hpp @@ -0,0 +1,236 @@ +/* + Copyright (c) 2009-2013, Jack Poulson + All rights reserved. + + This file is part of Elemental and is under the BSD 2-Clause License, + which can be found in the LICENSE file in the root directory, or at + http://opensource.org/licenses/BSD-2-Clause +*/ +#pragma once +#ifndef LAPACK_SVD_CHAN_HPP +#define LAPACK_SVD_CHAN_HPP + +#include "elemental/blas-like/level1/Adjoint.hpp" +#include "elemental/blas-like/level1/MakeTriangular.hpp" +#include "elemental/blas-like/level1/Scale.hpp" +#include "elemental/blas-like/level3/Gemm.hpp" +#include "elemental/lapack-like/ExplicitQR.hpp" +#include "elemental/lapack-like/QR.hpp" + +#include "elemental/lapack-like/SVD/GolubReinsch.hpp" + +namespace elem { +namespace svd { + +template +inline void +ChanUpper +( DistMatrix& A, + DistMatrix::type,VR,STAR>& s, + DistMatrix& V, + double heightRatio=1.5 ) +{ +#ifndef RELEASE + PushCallStack("svd::ChanUpper"); + if( A.Height() < A.Width() ) + throw std::logic_error("A must be at least as tall as it is wide"); + if( heightRatio <= 1.0 ) + throw std::logic_error("Nonsensical switchpoint for SVD"); +#endif + const Grid& g = A.Grid(); + const int m = A.Height(); + const int n = A.Width(); + if( m > heightRatio*n ) + { + DistMatrix R(g); + ExplicitQR( A, R ); + svd::GolubReinschUpper( R, s, V ); + // Unfortunately, extra memory is used in forming A := A R, + // where A has been overwritten with the Q from the QR factorization + // of the original state of A, and R has been overwritten with the U + // from the SVD of the R from the QR factorization of A + // + // Perhaps this should be broken into pieces. + DistMatrix ACopy( A ); + Gemm( NORMAL, NORMAL, F(1), ACopy, R, F(0), A ); + } + else + { + svd::GolubReinschUpper( A, s, V ); + } +#ifndef RELEASE + PopCallStack(); +#endif +} + +template +inline void +ChanUpper +( DistMatrix& A, + DistMatrix& s, + double heightRatio=1.2 ) +{ +#ifndef RELEASE + PushCallStack("svd::ChanUpper"); + if( heightRatio <= 1.0 ) + throw std::logic_error("Nonsensical switchpoint"); +#endif + const Grid& g = A.Grid(); + const int m = A.Height(); + const int n = A.Width(); + if( m >= heightRatio*n ) + { + QR( A ); + DistMatrix AT(g), + AB(g); + PartitionDown + ( A, AT, + AB, n ); + MakeTriangular( UPPER, AT ); + GolubReinschUpper( AT, s ); + } + else + { + GolubReinschUpper( A, s ); + } +#ifndef RELEASE + PopCallStack(); +#endif +} + +template +inline void +ChanUpper +( DistMatrix >& A, + DistMatrix& s, + double heightRatio=1.2 ) +{ +#ifndef RELEASE + PushCallStack("svd::ChanUpper"); + if( heightRatio <= 1.0 ) + throw std::logic_error("Nonsensical switchpoint"); +#endif + typedef Complex C; + const Grid& g = A.Grid(); + const int m = A.Height(); + const int n = A.Width(); + if( m >= heightRatio*n ) + { + DistMatrix t(g); + QR( A, t ); + DistMatrix AT(g), + AB(g); + PartitionDown + ( A, AT, + AB, n ); + MakeTriangular( UPPER, AT ); + GolubReinschUpper( AT, s ); + } + else + { + GolubReinschUpper( A, s ); + } +#ifndef RELEASE + PopCallStack(); +#endif +} + +//----------------------------------------------------------------------------// +// Grab the full SVD of the general matrix A, A = U diag(s) V^H using Chan's // +// algorithm. On exit, A is overwritten with U. // +//----------------------------------------------------------------------------// + +template +inline void +Chan +( DistMatrix& A, + DistMatrix::type,VR,STAR>& s, + DistMatrix& V, + double heightRatio=1.5 ) +{ +#ifndef RELEASE + PushCallStack("svd::Chan"); + if( heightRatio <= 1.0 ) + throw std::logic_error("Nonsensical switchpoint for SVD"); +#endif + typedef typename Base::type Real; + + // Check if we need to rescale the matrix, and do so if necessary + bool needRescaling; + Real scale; + svd::CheckScale( A, needRescaling, scale ); + if( needRescaling ) + Scale( scale, A ); + + // TODO: Switch between different algorithms. For instance, starting + // with a QR decomposition of tall-skinny matrices. + if( A.Height() >= A.Width() ) + { + svd::ChanUpper( A, s, V, heightRatio ); + } + else + { + // Lower bidiagonalization is not yet supported, so we instead play a + // trick to get the SVD of A. + Adjoint( A, V ); + svd::ChanUpper( V, s, A, heightRatio ); + } + + // Rescale the singular values if necessary + if( needRescaling ) + Scale( 1/scale, s ); +#ifndef RELEASE + PopCallStack(); +#endif +} + +//----------------------------------------------------------------------------// +// Grab the singular values of the general matrix A. // +//----------------------------------------------------------------------------// + +template +inline void +Chan +( DistMatrix& A, + DistMatrix::type,VR,STAR>& s, + double heightRatio=1.2 ) +{ +#ifndef RELEASE + PushCallStack("svd::Chan"); +#endif + typedef typename Base::type R; + + // Check if we need to rescale the matrix, and do so if necessary + bool needRescaling; + R scale; + svd::CheckScale( A, needRescaling, scale ); + if( needRescaling ) + Scale( scale, A ); + + // TODO: Switch between different algorithms. For instance, starting + // with a QR decomposition of tall-skinny matrices. + if( A.Height() >= A.Width() ) + { + svd::ChanUpper( A, s, heightRatio ); + } + else + { + // Lower bidiagonalization is not yet supported, so we instead play a + // trick to get the SVD of A. + DistMatrix AAdj( A.Grid() ); + Adjoint( A, AAdj ); + svd::ChanUpper( AAdj, s, heightRatio ); + } + + // Rescale the singular values if necessary + if( needRescaling ) + Scale( 1/scale, s ); +#ifndef RELEASE + PopCallStack(); +#endif +} + +} // namespace svd +} // namespace elem + +#endif // ifndef LAPACK_SVD_CHAN_HPP diff --git a/include/elemental/lapack-like/SVD/GolubReinsch.hpp b/include/elemental/lapack-like/SVD/GolubReinsch.hpp new file mode 100644 index 0000000000..1b0aa03ce0 --- /dev/null +++ b/include/elemental/lapack-like/SVD/GolubReinsch.hpp @@ -0,0 +1,567 @@ +/* + Copyright (c) 2009-2013, Jack Poulson + All rights reserved. + + This file is part of Elemental and is under the BSD 2-Clause License, + which can be found in the LICENSE file in the root directory, or at + http://opensource.org/licenses/BSD-2-Clause +*/ +#pragma once +#ifndef LAPACK_SVD_GOLUBREINSCH_HPP +#define LAPACK_SVD_GOLUBREINSCH_HPP + +#include "elemental/blas-like/level1/Adjoint.hpp" +#include "elemental/blas-like/level1/Transpose.hpp" +#include "elemental/lapack-like/ApplyPackedReflectors.hpp" +#include "elemental/lapack-like/Bidiag.hpp" +#include "elemental/matrices/Identity.hpp" +#include "elemental/matrices/Zeros.hpp" + +#include "elemental/lapack-like/SVD/Util.hpp" + +namespace elem { +namespace svd { + +// TODO: Wrap below routines to produce svd::GolubReinsch? + +template +inline void +GolubReinschUpper +( DistMatrix& A, + DistMatrix& s, + DistMatrix& V ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + Bidiag( A ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetDiagonal( d_MD_STAR ); + A.GetDiagonal( e_MD_STAR, offdiagonal ); + + // In order to use serial QR kernels, we need the full bidiagonal matrix + // on each process. + // + // NOTE: lapack::BidiagQRAlg expects e to be of length k + DistMatrix d_STAR_STAR( d_MD_STAR ); + DistMatrix eHat_STAR_STAR( k, 1, g ); + DistMatrix e_STAR_STAR( g ); + View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); + e_STAR_STAR = e_MD_STAR; + + // Initialize U and VTrans to the appropriate identity matrices. + DistMatrix U_VC_STAR( g ); + DistMatrix VTrans_STAR_VC( g ); + U_VC_STAR.AlignWith( A ); + VTrans_STAR_VC.AlignWith( V ); + Identity( m, k, U_VC_STAR ); + Identity( k, n, VTrans_STAR_VC ); + + // Compute the SVD of the bidiagonal matrix and accumulate the Givens + // rotations into our local portion of U and VTrans + Matrix& ULocal = U_VC_STAR.Matrix(); + Matrix& VTransLocal = VTrans_STAR_VC.Matrix(); + lapack::BidiagQRAlg + ( uplo, k, VTransLocal.Width(), ULocal.Height(), + d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), + VTransLocal.Buffer(), VTransLocal.LDim(), + ULocal.Buffer(), ULocal.LDim() ); + + // Make a copy of A (for the Householder vectors) and pull the necessary + // portions of U and VTrans into a standard matrix dist. + DistMatrix B( A ); + if( m >= n ) + { + DistMatrix AT( g ), + AB( g ); + DistMatrix UT_VC_STAR( g ), + UB_VC_STAR( g ); + PartitionDown( A, AT, + AB, n ); + PartitionDown( U_VC_STAR, UT_VC_STAR, + UB_VC_STAR, n ); + AT = UT_VC_STAR; + MakeZeros( AB ); + Transpose( VTrans_STAR_VC, V ); + } + else + { + DistMatrix VT( g ), + VB( g ); + DistMatrix VTransL_STAR_VC( g ), VTransR_STAR_VC( g ); + PartitionDown( V, VT, + VB, m ); + PartitionRight( VTrans_STAR_VC, VTransL_STAR_VC, VTransR_STAR_VC, m ); + Transpose( VTransL_STAR_VC, VT ); + MakeZeros( VB ); + } + + // Backtransform U and V + if( m >= n ) + { + ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); + ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); + } + else + { + ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); + ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); + } + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} + +template +inline void +GolubReinschUpper +( DistMatrix >& A, + DistMatrix& s, + DistMatrix >& V ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + typedef Complex C; + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + DistMatrix tP( g ), tQ( g ); + Bidiag( A, tP, tQ ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetRealPartOfDiagonal( d_MD_STAR ); + A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); + + // NOTE: lapack::BidiagQRAlg expects e to be of length k + DistMatrix d_STAR_STAR( d_MD_STAR ); + DistMatrix eHat_STAR_STAR( k, 1, g ); + DistMatrix e_STAR_STAR( g ); + View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); + e_STAR_STAR = e_MD_STAR; + + // Initialize U and VAdj to the appropriate identity matrices + DistMatrix U_VC_STAR( g ); + DistMatrix VAdj_STAR_VC( g ); + U_VC_STAR.AlignWith( A ); + VAdj_STAR_VC.AlignWith( V ); + Identity( m, k, U_VC_STAR ); + Identity( k, n, VAdj_STAR_VC ); + + // Compute the SVD of the bidiagonal matrix and accumulate the Givens + // rotations into our local portion of U and VAdj + Matrix& ULocal = U_VC_STAR.Matrix(); + Matrix& VAdjLocal = VAdj_STAR_VC.Matrix(); + lapack::BidiagQRAlg + ( uplo, k, VAdjLocal.Width(), ULocal.Height(), + d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), + VAdjLocal.Buffer(), VAdjLocal.LDim(), + ULocal.Buffer(), ULocal.LDim() ); + + // Make a copy of A (for the Householder vectors) and pull the necessary + // portions of U and VAdj into a standard matrix dist. + DistMatrix B( A ); + if( m >= n ) + { + DistMatrix AT( g ), + AB( g ); + DistMatrix UT_VC_STAR( g ), + UB_VC_STAR( g ); + PartitionDown( A, AT, + AB, n ); + PartitionDown( U_VC_STAR, UT_VC_STAR, + UB_VC_STAR, n ); + AT = UT_VC_STAR; + MakeZeros( AB ); + Adjoint( VAdj_STAR_VC, V ); + } + else + { + DistMatrix VT( g ), + VB( g ); + DistMatrix VAdjL_STAR_VC( g ), VAdjR_STAR_VC( g ); + PartitionDown( V, VT, + VB, m ); + PartitionRight( VAdj_STAR_VC, VAdjL_STAR_VC, VAdjR_STAR_VC, m ); + Adjoint( VAdjL_STAR_VC, VT ); + MakeZeros( VB ); + } + + // Backtransform U and V + if( m >= n ) + { + ApplyPackedReflectors + ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, B, tQ, A ); + ApplyPackedReflectors + ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 1, B, tP, V ); + } + else + { + ApplyPackedReflectors + ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, -1, B, tQ, A ); + ApplyPackedReflectors + ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 0, B, tP, V ); + } + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} + +#ifdef HAVE_FLA_BSVD +template<> +inline void +GolubReinschUpper +( DistMatrix& A, + DistMatrix& s, + DistMatrix& V ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + typedef double Real; + typedef Complex C; + + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + Bidiag( A ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetDiagonal( d_MD_STAR ); + A.GetDiagonal( e_MD_STAR, offdiagonal ); + + // In order to use serial QR kernels, we need the full bidiagonal matrix + // on each process + DistMatrix d_STAR_STAR( d_MD_STAR ), + e_STAR_STAR( e_MD_STAR ); + + // Initialize U and V to the appropriate identity matrices. + DistMatrix U_VC_STAR( g ); + DistMatrix V_VC_STAR( g ); + U_VC_STAR.AlignWith( A ); + V_VC_STAR.AlignWith( V ); + Identity( m, k, U_VC_STAR ); + Identity( n, k, V_VC_STAR ); + + // Compute the SVD of the bidiagonal matrix and accumulate the Givens + // rotations into our local portion of U and V + // NOTE: This _only_ works in the case where m >= n + const int numAccum = 32; + const int maxNumIts = 30; + const int bAlg = 512; + std::vector GBuffer( (k-1)*numAccum ), + HBuffer( (k-1)*numAccum ); + FLA_Bsvd_v_opd_var1 + ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), + numAccum, maxNumIts, + d_STAR_STAR.Buffer(), 1, + e_STAR_STAR.Buffer(), 1, + &GBuffer[0], 1, k-1, + &HBuffer[0], 1, k-1, + U_VC_STAR.Buffer(), 1, U_VC_STAR.LDim(), + V_VC_STAR.Buffer(), 1, V_VC_STAR.LDim(), + bAlg ); + + // Make a copy of A (for the Householder vectors) and pull the necessary + // portions of U and V into a standard matrix dist. + DistMatrix B( A ); + if( m >= n ) + { + DistMatrix AT( g ), + AB( g ); + DistMatrix UT_VC_STAR( g ), + UB_VC_STAR( g ); + PartitionDown( A, AT, + AB, n ); + PartitionDown( U_VC_STAR, UT_VC_STAR, + UB_VC_STAR, n ); + AT = UT_VC_STAR; + MakeZeros( AB ); + V = V_VC_STAR; + } + else + { + DistMatrix VT( g ), + VB( g ); + DistMatrix VT_VC_STAR( g ), + VB_VC_STAR( g ); + PartitionDown( V, VT, + VB, m ); + PartitionDown + ( V_VC_STAR, VT_VC_STAR, + VB_VC_STAR, m ); + VT = VT_VC_STAR; + MakeZeros( VB ); + } + + // Backtransform U and V + if( m >= n ) + { + ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, 0, B, A ); + ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 1, B, V ); + } + else + { + ApplyPackedReflectors( LEFT, LOWER, VERTICAL, BACKWARD, -1, B, A ); + ApplyPackedReflectors( LEFT, UPPER, HORIZONTAL, BACKWARD, 0, B, V ); + } + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} + +template<> +inline void +GolubReinschUpper +( DistMatrix >& A, + DistMatrix& s, + DistMatrix >& V ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + typedef double Real; + typedef Complex C; + + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + DistMatrix tP( g ), tQ( g ); + Bidiag( A, tP, tQ ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetRealPartOfDiagonal( d_MD_STAR ); + A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); + + // In order to use serial QR kernels, we need the full bidiagonal matrix + // on each process + DistMatrix d_STAR_STAR( d_MD_STAR ), + e_STAR_STAR( e_MD_STAR ); + + // Initialize U and VAdj to the appropriate identity matrices + DistMatrix U_VC_STAR( g ); + DistMatrix V_VC_STAR( g ); + U_VC_STAR.AlignWith( A ); + V_VC_STAR.AlignWith( V ); + Identity( m, k, U_VC_STAR ); + Identity( n, k, V_VC_STAR ); + + // Compute the SVD of the bidiagonal matrix and accumulate the Givens + // rotations into our local portion of U and V + // NOTE: This _only_ works in the case where m >= n + const int numAccum = 32; + const int maxNumIts = 30; + const int bAlg = 512; + std::vector GBuffer( (k-1)*numAccum ), + HBuffer( (k-1)*numAccum ); + FLA_Bsvd_v_opz_var1 + ( k, U_VC_STAR.LocalHeight(), V_VC_STAR.LocalHeight(), + numAccum, maxNumIts, + d_STAR_STAR.Buffer(), 1, + e_STAR_STAR.Buffer(), 1, + &GBuffer[0], 1, k-1, + &HBuffer[0], 1, k-1, + U_VC_STAR.Buffer(), 1, U_VC_STAR.LDim(), + V_VC_STAR.Buffer(), 1, V_VC_STAR.LDim(), + bAlg ); + + // Make a copy of A (for the Householder vectors) and pull the necessary + // portions of U and V into a standard matrix dist. + DistMatrix B( A ); + if( m >= n ) + { + DistMatrix AT( g ), + AB( g ); + DistMatrix UT_VC_STAR( g ), + UB_VC_STAR( g ); + PartitionDown( A, AT, + AB, n ); + PartitionDown( U_VC_STAR, UT_VC_STAR, + UB_VC_STAR, n ); + AT = UT_VC_STAR; + MakeZeros( AB ); + V = V_VC_STAR; + } + else + { + DistMatrix VT( g ), + VB( g ); + DistMatrix VT_VC_STAR( g ), + VB_VC_STAR( g ); + PartitionDown( V, VT, + VB, m ); + PartitionDown + ( V_VC_STAR, VT_VC_STAR, + VB_VC_STAR, m ); + VT = VT_VC_STAR; + MakeZeros( VB ); + } + + // Backtransform U and V + if( m >= n ) + { + ApplyPackedReflectors + ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, B, tQ, A ); + ApplyPackedReflectors + ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 1, B, tP, V ); + } + else + { + ApplyPackedReflectors + ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, -1, B, tQ, A ); + ApplyPackedReflectors + ( LEFT, UPPER, HORIZONTAL, BACKWARD, UNCONJUGATED, 0, B, tP, V ); + } + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} +#endif // HAVE_FLA_BSVD + +template +inline void +GolubReinschUpper +( DistMatrix& A, + DistMatrix& s ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + Bidiag( A ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetDiagonal( d_MD_STAR ); + A.GetDiagonal( e_MD_STAR, offdiagonal ); + + + // In order to use serial QR kernels, we need the full bidiagonal matrix + // on each process + // + // NOTE: lapack::BidiagQRAlg expects e to be of length k + DistMatrix d_STAR_STAR( d_MD_STAR ); + DistMatrix eHat_STAR_STAR( k, 1, g ); + DistMatrix e_STAR_STAR( g ); + View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); + e_STAR_STAR = e_MD_STAR; + + // Compute the singular values of the bidiagonal matrix + lapack::BidiagQRAlg + ( uplo, k, 0, 0, + d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), + (Real*)0, 1, (Real*)0, 1 ); + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} + +template +inline void +GolubReinschUpper +( DistMatrix >& A, + DistMatrix& s ) +{ +#ifndef RELEASE + PushCallStack("svd::GolubReinschUpper"); +#endif + typedef Complex C; + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min( m, n ); + const int offdiagonal = ( m>=n ? 1 : -1 ); + const char uplo = ( m>=n ? 'U' : 'L' ); + const Grid& g = A.Grid(); + + // Bidiagonalize A + DistMatrix tP(g), tQ(g); + Bidiag( A, tP, tQ ); + + // Grab copies of the diagonal and sub/super-diagonal of A + DistMatrix d_MD_STAR( g ), + e_MD_STAR( g ); + A.GetRealPartOfDiagonal( d_MD_STAR ); + A.GetRealPartOfDiagonal( e_MD_STAR, offdiagonal ); + + // In order to use serial QR kernels, we need the full bidiagonal matrix + // on each process + // + // NOTE: lapack::BidiagQRAlg expects e to be of length k + DistMatrix d_STAR_STAR( d_MD_STAR ); + DistMatrix eHat_STAR_STAR( k, 1, g ); + DistMatrix e_STAR_STAR( g ); + View( e_STAR_STAR, eHat_STAR_STAR, 0, 0, k-1, 1 ); + e_STAR_STAR = e_MD_STAR; + + // Compute the singular values of the bidiagonal matrix + lapack::BidiagQRAlg + ( uplo, k, 0, 0, + d_STAR_STAR.Buffer(), e_STAR_STAR.Buffer(), + (C*)0, 1, (C*)0, 1 ); + + // Copy out the appropriate subset of the singular values + s = d_STAR_STAR; +#ifndef RELEASE + PopCallStack(); +#endif +} + +} // namespace svd +} // namespace elem + +#endif // ifndef LAPACK_SVD_GOLUBREINSCH_HPP diff --git a/include/elemental/lapack-like/SVD/Util.hpp b/include/elemental/lapack-like/SVD/Util.hpp new file mode 100644 index 0000000000..ea2a9e568b --- /dev/null +++ b/include/elemental/lapack-like/SVD/Util.hpp @@ -0,0 +1,104 @@ +/* + Copyright (c) 2009-2013, Jack Poulson + All rights reserved. + + This file is part of Elemental and is under the BSD 2-Clause License, + which can be found in the LICENSE file in the root directory, or at + http://opensource.org/licenses/BSD-2-Clause +*/ +#pragma once +#ifndef LAPACK_SVD_UTIL_HPP +#define LAPACK_SVD_UTIL_HPP + +#include "elemental/blas-like/level1/Adjoint.hpp" +#include "elemental/lapack-like/Norm/One.hpp" + +namespace elem { +namespace svd { + +template +inline void +CheckScale +( DistMatrix& A, bool& needRescaling, typename Base::type& scale ) +{ + typedef typename Base::type R; + + scale = 1; + needRescaling = false; + const R oneNormOfA = OneNorm( A ); + const R safeMin = lapack::MachineSafeMin(); + const R precision = lapack::MachinePrecision(); + const R smallNumber = safeMin/precision; + const R bigNumber = 1/smallNumber; + const R rhoMin = Sqrt(smallNumber); + const R rhoMax = std::min( Sqrt(bigNumber), 1/Sqrt(Sqrt(safeMin)) ); + + if( oneNormOfA > 0 && oneNormOfA < rhoMin ) + { + needRescaling = true; + scale = rhoMin/oneNormOfA; + } + else if( oneNormOfA > rhoMax ) + { + needRescaling = true; + scale = rhoMax/oneNormOfA; + } +} + +template +inline void +DivideAndConquerSVD +( Matrix& A, Matrix::type>& s, Matrix& V ) +{ +#ifndef RELEASE + PushCallStack("svd::DivideAndConquerSVD"); +#endif + typedef typename Base::type R; + + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min(m,n); + s.ResizeTo( k, 1 ); + Matrix U( m, k ); + Matrix VAdj( k, n ); + lapack::DivideAndConquerSVD + ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), + VAdj.Buffer(), VAdj.LDim() ); + + A = U; + Adjoint( VAdj, V ); +#ifndef RELEASE + PopCallStack(); +#endif +} + +template +inline void +QRSVD( Matrix& A, Matrix::type>& s, Matrix& V ) +{ +#ifndef RELEASE + PushCallStack("svd::QRSVD"); +#endif + typedef typename Base::type R; + + const int m = A.Height(); + const int n = A.Width(); + const int k = std::min(m,n); + s.ResizeTo( k, 1 ); + Matrix U( m, k ); + Matrix VAdj( k, n ); + lapack::QRSVD + ( m, n, A.Buffer(), A.LDim(), s.Buffer(), U.Buffer(), U.LDim(), + VAdj.Buffer(), VAdj.LDim() ); + + A = U; + Adjoint( VAdj, V ); +#ifndef RELEASE + PopCallStack(); +#endif +} + +} // namespace svd +} // namespace elem + +#endif // ifndef LAPACK_SVD_UTIL_HPP diff --git a/include/elemental/lapack-like/SymmetricNorm/KyFan.hpp b/include/elemental/lapack-like/SymmetricNorm/KyFan.hpp index 28a3d30c43..4ec2c413d2 100644 --- a/include/elemental/lapack-like/SymmetricNorm/KyFan.hpp +++ b/include/elemental/lapack-like/SymmetricNorm/KyFan.hpp @@ -29,7 +29,7 @@ SymmetricKyFanNorm( UpperOrLower uplo, const Matrix& A, int k ) Matrix B( A ); Matrix s; MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); R norm = 0; for( int j=k-1; j>=0; --j ) @@ -54,7 +54,7 @@ SymmetricKyFanNorm( UpperOrLower uplo, const DistMatrix& A, int k ) DistMatrix B( A ); DistMatrix s( A.Grid() ); MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); R localNorm = 0; DistMatrix sTop( A.Grid() ); diff --git a/include/elemental/lapack-like/SymmetricNorm/Schatten.hpp b/include/elemental/lapack-like/SymmetricNorm/Schatten.hpp index 5f70d5d914..cc62791832 100644 --- a/include/elemental/lapack-like/SymmetricNorm/Schatten.hpp +++ b/include/elemental/lapack-like/SymmetricNorm/Schatten.hpp @@ -27,7 +27,7 @@ SymmetricSchattenNorm Matrix B( A ); Matrix s; MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); // TODO: Think of how to make this more stable const int k = s.Height(); @@ -53,7 +53,7 @@ SymmetricSchattenNorm DistMatrix B( A ); DistMatrix s( A.Grid() ); MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); // TODO: Think of how to make this more stable const int kLocal = s.LocalHeight(); diff --git a/include/elemental/lapack-like/SymmetricNorm/Two.hpp b/include/elemental/lapack-like/SymmetricNorm/Two.hpp index d3f24d2e8f..24faa4311b 100644 --- a/include/elemental/lapack-like/SymmetricNorm/Two.hpp +++ b/include/elemental/lapack-like/SymmetricNorm/Two.hpp @@ -27,7 +27,7 @@ SymmetricTwoNorm( UpperOrLower uplo, const Matrix& A ) Matrix B( A ); Matrix s; MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); const R norm = MaxNorm( s ); #ifndef RELEASE PopCallStack(); @@ -46,7 +46,7 @@ SymmetricTwoNorm( UpperOrLower uplo, const DistMatrix& A ) DistMatrix B( A ); DistMatrix s( A.Grid() ); MakeSymmetric( uplo, B ); - SingularValues( B, s ); + SVD( B, s ); const R norm = MaxNorm( s ); #ifndef RELEASE PopCallStack(); diff --git a/src/core/imports/lapack.cpp b/src/core/imports/lapack.cpp index a9d584144f..c834cee0a7 100644 --- a/src/core/imports/lapack.cpp +++ b/src/core/imports/lapack.cpp @@ -817,13 +817,13 @@ void QRSVD } // -// Compute singular values with QR algorithm +// Compute singular values (with the QR algorithm) // -void SingularValues( int m, int n, float* A, int lda, float* s ) +void SVD( int m, int n, float* A, int lda, float* s ) { #ifndef RELEASE - PushCallStack("lapack::SingularValues"); + PushCallStack("lapack::SVD"); #endif if( m==0 || n==0 ) { @@ -861,10 +861,10 @@ void SingularValues( int m, int n, float* A, int lda, float* s ) #endif } -void SingularValues( int m, int n, double* A, int lda, double* s ) +void SVD( int m, int n, double* A, int lda, double* s ) { #ifndef RELEASE - PushCallStack("lapack::SingularValues"); + PushCallStack("lapack::SVD"); #endif if( m==0 || n==0 ) { @@ -902,10 +902,10 @@ void SingularValues( int m, int n, double* A, int lda, double* s ) #endif } -void SingularValues( int m, int n, scomplex* A, int lda, float* s ) +void SVD( int m, int n, scomplex* A, int lda, float* s ) { #ifndef RELEASE - PushCallStack("lapack::SingularValues"); + PushCallStack("lapack::SVD"); #endif if( m==0 || n==0 ) { @@ -945,10 +945,10 @@ void SingularValues( int m, int n, scomplex* A, int lda, float* s ) #endif } -void SingularValues( int m, int n, dcomplex* A, int lda, double* s ) +void SVD( int m, int n, dcomplex* A, int lda, double* s ) { #ifndef RELEASE - PushCallStack("lapack::SingularValues"); + PushCallStack("lapack::SVD"); #endif if( m==0 || n==0 ) {