diff --git a/include/core.hpp b/include/core.hpp index 283ab4504..665f961d7 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -43,6 +43,8 @@ void parallelSort(int size, int rank, MPI_Comm comm, void matrixRightSolve(int NrowsA, int NcolsA, double *A, int NrowsB, int NcolsB, double *B, double *C); void matrixRightSolve(int NrowsA, int NcolsA, float *A, int NrowsB, int NcolsB, float *B, float *C); +void matrixUnderdeterminedRightSolveMinNorm(int NrowsA, int NcolsA, dfloat *A, dfloat *b, dfloat *x); +void matrixUnderdeterminedRightSolveCPQR(int NrowsA, int NcolsA, dfloat *A, dfloat *b, dfloat *x); void matrixEigenVectors(int N, double *A, double *VR, double *WR, double *WI); void matrixEigenVectors(int N, float *A, float *VR, float *WR, float *WI); @@ -70,4 +72,4 @@ void matrixTranspose(const int M, const int N, const long long int *A, const int LDA, long long int *AT, const int LDAT); -#endif \ No newline at end of file +#endif diff --git a/include/initialGuess.hpp b/include/initialGuess.hpp new file mode 100644 index 000000000..cd1af40be --- /dev/null +++ b/include/initialGuess.hpp @@ -0,0 +1,175 @@ +/* + +The MIT License (MIT) + +Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#ifndef INITIALGUESS_HPP +#define INITIALGUESS_HPP + +#include "linearSolver.hpp" + +// Abstract base class for different initial guess strategies. +class initialGuessStrategy_t { +protected: + platform_t& platform; + settings_t& settings; + MPI_Comm comm; + + dlong Ntotal; // Degrees of freedom + +public: + initialGuessStrategy_t(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + virtual ~initialGuessStrategy_t(); + + virtual void FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs) = 0; + virtual void Update(solver_t& solver, occa::memory& o_x, occa::memory& o_rhs) = 0; +}; + +// Default initial guess strategy: use whatever the user gave us. +class igDefaultStrategy : public initialGuessStrategy_t { +public: + igDefaultStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + + void FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs); + void Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs); +}; + +// Zero initial guess strategy: use a zero initial guess. +class igZeroStrategy : public initialGuessStrategy_t { +public: + igZeroStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + + void FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs); + void Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs); +}; + +// Initial guess strategies based on RHS projection. +class igProjectionStrategy : public initialGuessStrategy_t { +protected: + dlong curDim; // Current dimension of the initial guess space + dlong maxDim; // Maximum dimension of the initial guess space + + occa::memory o_btilde; // vector (e.g., to be added to space) + occa::memory o_xtilde; // Solution vector corresponding to o_btilde + occa::memory o_Btilde; // space (orthogonalized) + occa::memory o_Xtilde; // Solution space corresponding to space + + occa::memory o_w; // Inverse-degree weights for inner products. + + // temporary buffer for basis inner product output + dlong ctmpNblocks; + dfloat *ctmp; + occa::memory o_ctmp; + + dfloat *alphas; // Buffers for storing inner products. + dfloat *alphasThisRank; + occa::memory o_alphas; + + occa::kernel igBasisInnerProductsKernel; + occa::kernel igReconstructKernel; + occa::kernel igScaleKernel; + occa::kernel igUpdateKernel; + + void igBasisInnerProducts(occa::memory& o_x, occa::memory& o_Q, occa::memory& o_c, dfloat *c, dfloat *cThisRank); + void igReconstruct(occa::memory& o_u, dfloat a, occa::memory& o_c, occa::memory& o_Q, occa::memory& o_unew); + +public: + igProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + virtual ~igProjectionStrategy(); + + virtual void FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs); + virtual void Update(solver_t& solver, occa::memory& o_x, occa::memory& o_rhs) = 0; +}; + +// "Classic" initial guess strategy from Fischer's 1998 paper. +class igClassicProjectionStrategy : public igProjectionStrategy { +public: + igClassicProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + + void Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs); +}; + +// Rolling QR update for projection history space a la Christensen's thesis. +class igRollingQRProjectionStrategy : public igProjectionStrategy { +private: + dfloat *R; // R factor in QR decomposition (row major) + occa::memory o_R; + + occa::kernel igDropQRFirstColumnKernel; + + void givensRotation(dfloat a, dfloat b, dfloat *c, dfloat *s); + +public: + igRollingQRProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + ~igRollingQRProjectionStrategy(); + + void Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs); +}; + +// Extrapolation initial guess strategy. +class igExtrapStrategy : public initialGuessStrategy_t { +private: + int Nhistory; + int shift; + int entry; + occa::memory o_xh; + occa::memory o_coeffs; + occa::kernel igExtrapKernel; + occa::kernel igExtrapSparseKernel; + + int Nsparse; + occa::memory o_sparseIds; + occa::memory o_sparseCoeffs; + + void extrapCoeffs(int m, int M, dfloat *c); + +public: + igExtrapStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight); + + void FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs); + void Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs); +}; + +// Linear solver with successive-RHS initial-guess generation. +class initialGuessSolver_t : public linearSolver_t { +protected: + initialGuessStrategy_t* igStrategy; // The initial guess strategy. + linearSolver_t* linearSolver; // The linearSolver_t that does the solve. + +public: + initialGuessSolver_t(dlong _N, dlong _Nhalo, platform_t& _platform, settings_t& _settings, MPI_Comm _comm); + ~initialGuessSolver_t(); + + static initialGuessSolver_t* Setup(dlong _N, dlong _Nhalo, + platform_t& platform, settings_t& settings, MPI_Comm _comm, + int _weighted, occa::memory& _o_weight); + + int Solve(solver_t& solver, precon_t& precon, + occa::memory& o_x, occa::memory& o_rhs, + const dfloat tol, const int MAXIT, const int verbose); +}; + +void initialGuessAddSettings(settings_t& settings, const string prefix = ""); + +#endif /* INITIALGUESS_HPP */ diff --git a/include/mesh.hpp b/include/mesh.hpp index 6762baf7d..cde12ec60 100644 --- a/include/mesh.hpp +++ b/include/mesh.hpp @@ -354,159 +354,163 @@ class mesh_t { void DegreeRaiseMatrix1D(int Nc, int Nf, dfloat *P); void DegreeRaiseMatrixTri2D(int Nc, int Nf, dfloat *P); void DegreeRaiseMatrixTet3D(int Nc, int Nf, dfloat *P); -protected: + + /***************************************************************************/ + // Basic codes for generating nodes, polynomials, matrices, etc. + +public: //1D - void Nodes1D(int N, dfloat *r); - void EquispacedNodes1D(int _N, dfloat *_r); - void OrthonormalBasis1D(dfloat a, int i, dfloat *P); - void GradOrthonormalBasis1D(dfloat a, int i, dfloat *Pr); - void Vandermonde1D(int N, int Npoints, dfloat *r, dfloat *V); - void GradVandermonde1D(int N, int Npoints, dfloat *r, dfloat *Vr); - - void MassMatrix1D(int _Np, dfloat *V, dfloat *MM); - void Dmatrix1D(int _N, int NpointsIn, dfloat *_rIn, int NpointsOut, dfloat *_rOut, dfloat *_Dr); - void InterpolationMatrix1D(int _N,int NpointsIn, dfloat *rIn, int NpointsOut, dfloat *rOut, dfloat *I); - void CubatureWeakDmatrix1D(int _Nq, int _cubNq, dfloat *_cubProject, dfloat *_cubD, dfloat *_cubPDT); + static void Nodes1D(int N, dfloat *r); + static void EquispacedNodes1D(int _N, dfloat *_r); + static void OrthonormalBasis1D(dfloat a, int i, dfloat *P); + static void GradOrthonormalBasis1D(dfloat a, int i, dfloat *Pr); + static void Vandermonde1D(int N, int Npoints, dfloat *r, dfloat *V); + static void GradVandermonde1D(int N, int Npoints, dfloat *r, dfloat *Vr); + + static void MassMatrix1D(int _Np, dfloat *V, dfloat *MM); + static void Dmatrix1D(int _N, int NpointsIn, dfloat *_rIn, int NpointsOut, dfloat *_rOut, dfloat *_Dr); + static void InterpolationMatrix1D(int _N,int NpointsIn, dfloat *rIn, int NpointsOut, dfloat *rOut, dfloat *I); + static void CubatureWeakDmatrix1D(int _Nq, int _cubNq, dfloat *_cubProject, dfloat *_cubD, dfloat *_cubPDT); //Jacobi polynomial evaluation - dfloat JacobiP(dfloat a, dfloat alpha, dfloat beta, int N); - dfloat GradJacobiP(dfloat a, dfloat alpha, dfloat beta, int N); + static dfloat JacobiP(dfloat a, dfloat alpha, dfloat beta, int N); + static dfloat GradJacobiP(dfloat a, dfloat alpha, dfloat beta, int N); //Gauss-Legendre-Lobatto quadrature nodes - void JacobiGLL(int N, dfloat *x, dfloat *w=NULL); + static void JacobiGLL(int N, dfloat *x, dfloat *w=NULL); //Nth order Gauss-Jacobi quadrature nodes and weights - void JacobiGQ(dfloat alpha, dfloat beta, int N, dfloat *x, dfloat *w); + static void JacobiGQ(dfloat alpha, dfloat beta, int N, dfloat *x, dfloat *w); //Tris - void NodesTri2D(int _N, dfloat *_r, dfloat *_s); - void FaceNodesTri2D(int _N, dfloat *_r, dfloat *_s, int *_faceNodes); - void VertexNodesTri2D(int _N, dfloat *_r, dfloat *_s, int *_vertexNodes); - void EquispacedNodesTri2D(int _N, dfloat *_r, dfloat *_s); - void EquispacedEToVTri2D(int _N, int *_EToV); - void SEMFEMNodesTri2D(int _N, int *_Np, dfloat **_r, dfloat **_s); - void SEMFEMEToVTri2D(int _N, int *_NelFEM, int **_EToV); - void OrthonormalBasisTri2D(dfloat a, dfloat b, int i, int j, dfloat *P); - void GradOrthonormalBasisTri2D(dfloat a, dfloat b, int i, int j, dfloat *Pr, dfloat *Ps); - void VandermondeTri2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *V); - void GradVandermondeTri2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *Vr, dfloat *Vs); - void MassMatrixTri2D(int _Np, dfloat *V, dfloat *_MM); - void invMassMatrixTri2D(int _Np, dfloat *V, dfloat *_invMM); - void DmatrixTri2D(int _N, int Npoints, dfloat *_r, dfloat *_s, - dfloat *_Dr, dfloat *_Ds); - void LIFTmatrixTri2D(int _N, int *_faceNodes, - dfloat *_r, dfloat *_s, dfloat *_LIFT); - void SurfaceMassMatrixTri2D(int _N, dfloat *_MM, dfloat *_LIFT, dfloat *_sM); - void SmatrixTri2D(int _N, dfloat *_Dr, dfloat *_Ds, dfloat *_MM, - dfloat *_Srr, dfloat *_Srs, dfloat *_Sss); - void InterpolationMatrixTri2D(int _N, - int NpointsIn, dfloat *rIn, dfloat *sIn, - int NpointsOut, dfloat *rOut, dfloat *sOut, - dfloat *I); - void CubatureNodesTri2D(int cubTriN, int*cubNp, dfloat **cubTrir, dfloat **cubTris, dfloat **cubTriw); - void CubaturePmatrixTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, - int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubProject); - void CubatureWeakDmatricesTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, - int _cubNp, dfloat *_cubr, dfloat *_cubs, - dfloat *_cubPDrT, dfloat *_cubPDsT); - void CubatureSurfaceMatricesTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, int *_faceNodes, - int _intNfp, dfloat *_intr, dfloat *_intw, - dfloat *_intInterp, dfloat *_intLIFT); - void SEMFEMInterpMatrixTri2D(int _N, - int _Np, dfloat *_r, dfloat *_s, - int _NpFEM, dfloat *rFEM, dfloat *sFEM, - dfloat *I); - - void Warpfactor(int _N, int Npoints, dfloat *r, dfloat *w); - void WarpBlendTransformTri2D(int _N, int _Npoints, dfloat *_r, dfloat *_s, dfloat alphaIn=-1); + static void NodesTri2D(int _N, dfloat *_r, dfloat *_s); + static void FaceNodesTri2D(int _N, dfloat *_r, dfloat *_s, int *_faceNodes); + static void VertexNodesTri2D(int _N, dfloat *_r, dfloat *_s, int *_vertexNodes); + static void EquispacedNodesTri2D(int _N, dfloat *_r, dfloat *_s); + static void EquispacedEToVTri2D(int _N, int *_EToV); + static void SEMFEMNodesTri2D(int _N, int *_Np, dfloat **_r, dfloat **_s); + static void SEMFEMEToVTri2D(int _N, int *_NelFEM, int **_EToV); + static void OrthonormalBasisTri2D(dfloat a, dfloat b, int i, int j, dfloat *P); + static void GradOrthonormalBasisTri2D(dfloat a, dfloat b, int i, int j, dfloat *Pr, dfloat *Ps); + static void VandermondeTri2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *V); + static void GradVandermondeTri2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *Vr, dfloat *Vs); + static void MassMatrixTri2D(int _Np, dfloat *V, dfloat *_MM); + static void invMassMatrixTri2D(int _Np, dfloat *V, dfloat *_invMM); + static void DmatrixTri2D(int _N, int Npoints, dfloat *_r, dfloat *_s, + dfloat *_Dr, dfloat *_Ds); + static void LIFTmatrixTri2D(int _N, int *_faceNodes, + dfloat *_r, dfloat *_s, dfloat *_LIFT); + static void SurfaceMassMatrixTri2D(int _N, dfloat *_MM, dfloat *_LIFT, dfloat *_sM); + static void SmatrixTri2D(int _N, dfloat *_Dr, dfloat *_Ds, dfloat *_MM, + dfloat *_Srr, dfloat *_Srs, dfloat *_Sss); + static void InterpolationMatrixTri2D(int _N, + int NpointsIn, dfloat *rIn, dfloat *sIn, + int NpointsOut, dfloat *rOut, dfloat *sOut, + dfloat *I); + static void CubatureNodesTri2D(int cubTriN, int*cubNp, dfloat **cubTrir, dfloat **cubTris, dfloat **cubTriw); + static void CubaturePmatrixTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, + int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubProject); + static void CubatureWeakDmatricesTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, + int _cubNp, dfloat *_cubr, dfloat *_cubs, + dfloat *_cubPDrT, dfloat *_cubPDsT); + static void CubatureSurfaceMatricesTri2D(int _N, int _Np, dfloat *_r, dfloat *_s, int *_faceNodes, + int _intNfp, dfloat *_intr, dfloat *_intw, + dfloat *_intInterp, dfloat *_intLIFT); + static void SEMFEMInterpMatrixTri2D(int _N, + int _Np, dfloat *_r, dfloat *_s, + int _NpFEM, dfloat *rFEM, dfloat *sFEM, + dfloat *I); + + static void Warpfactor(int _N, int Npoints, dfloat *r, dfloat *w); + static void WarpBlendTransformTri2D(int _N, int _Npoints, dfloat *_r, dfloat *_s, dfloat alphaIn=-1); //Quads - void NodesQuad2D(int _N, dfloat *_r, dfloat *_s); - void FaceNodesQuad2D(int _N, dfloat *_r, dfloat *_s, int *_faceNodes); - void VertexNodesQuad2D(int _N, dfloat *_r, dfloat *_s, int *_vertexNodes); - void EquispacedNodesQuad2D(int _N, dfloat *_r, dfloat *_s); - void EquispacedEToVQuad2D(int _N, int *_EToV); - void SEMFEMEToVQuad2D(int _N, int *_EToV); - void OrthonormalBasisQuad2D(dfloat a, dfloat b, int i, int j, dfloat *P); - void GradOrthonormalBasisQuad2D(dfloat a, dfloat b, int i, int j, dfloat *Pr, dfloat *Ps); - void VandermondeQuad2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *V); - void GradVandermondeQuad2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *Vr, dfloat *Vs); - void MassMatrixQuad2D(int _Np, dfloat *V, dfloat *_MM); - void LumpedMassMatrixQuad2D(int _N, dfloat *_gllw, dfloat *_MM); - void invLumpedMassMatrixQuad2D(int _N, dfloat *_gllw, dfloat *_invMM); - void DmatrixQuad2D(int _N, int Npoints, dfloat *_r, dfloat *_s, + static void NodesQuad2D(int _N, dfloat *_r, dfloat *_s); + static void FaceNodesQuad2D(int _N, dfloat *_r, dfloat *_s, int *_faceNodes); + static void VertexNodesQuad2D(int _N, dfloat *_r, dfloat *_s, int *_vertexNodes); + static void EquispacedNodesQuad2D(int _N, dfloat *_r, dfloat *_s); + static void EquispacedEToVQuad2D(int _N, int *_EToV); + static void SEMFEMEToVQuad2D(int _N, int *_EToV); + static void OrthonormalBasisQuad2D(dfloat a, dfloat b, int i, int j, dfloat *P); + static void GradOrthonormalBasisQuad2D(dfloat a, dfloat b, int i, int j, dfloat *Pr, dfloat *Ps); + static void VandermondeQuad2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *V); + static void GradVandermondeQuad2D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *Vr, dfloat *Vs); + static void MassMatrixQuad2D(int _Np, dfloat *V, dfloat *_MM); + static void LumpedMassMatrixQuad2D(int _N, dfloat *_gllw, dfloat *_MM); + static void invLumpedMassMatrixQuad2D(int _N, dfloat *_gllw, dfloat *_invMM); + static void DmatrixQuad2D(int _N, int Npoints, dfloat *_r, dfloat *_s, dfloat *_Dr, dfloat *_Ds); - void InterpolationMatrixQuad2D(int _N, - int NpointsIn, dfloat *rIn, dfloat *sIn, - int NpointsOut, dfloat *rOut, dfloat *sOut, - dfloat *I); + static void InterpolationMatrixQuad2D(int _N, + int NpointsIn, dfloat *rIn, dfloat *sIn, + int NpointsOut, dfloat *rOut, dfloat *sOut, + dfloat *I); //Tets - void NodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); - void FaceNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes); - void VertexNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_vertexNodes); - void EquispacedNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); - void EquispacedEToVTet3D(int _N, int *_EToV); - void SEMFEMEToVTet3D(int _N, int *_EToV); - void OrthonormalBasisTet3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *P); - void GradOrthonormalBasisTet3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *Pr, dfloat *Ps, dfloat *Pt); - void VandermondeTet3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *V); - void GradVandermondeTet3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *Vr, dfloat *Vs, dfloat *Vt); - void MassMatrixTet3D(int _Np, dfloat *V, dfloat *_MM); - void invMassMatrixTet3D(int _Np, dfloat *V, dfloat *_invMM); - void DmatrixTet3D(int _N, int Npoints, dfloat *_r, dfloat *_s, dfloat *_t, - dfloat *_Dr, dfloat *_Ds, dfloat *_Dt); - void LIFTmatrixTet3D(int _N, int *_faceNodes, - dfloat *_r, dfloat *_s, dfloat *_t, dfloat *_LIFT); - void SurfaceMassMatrixTet3D(int _N, dfloat *_MM, dfloat *_LIFT, dfloat *_sM); - void SmatrixTet3D(int _N, dfloat *_Dr, dfloat *_Ds, dfloat *_Dt, dfloat *_MM, - dfloat *_Srr, dfloat *_Srs, dfloat *_Srt, - dfloat *_Sss, dfloat *_Sst, dfloat *_Stt); - void InterpolationMatrixTet3D(int _N, - int NpointsIn, dfloat *rIn, dfloat *sIn, dfloat *tIn, - int NpointsOut, dfloat *rOut, dfloat *sOut, dfloat *tOut, - dfloat *I); - void CubatureNodesTet3D(int cubN, int*cubNp, dfloat **cubr, dfloat **cubs, dfloat **cubt, dfloat **cubw); - void CubaturePmatrixTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, - int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubt, - dfloat *_cubProject); - void CubatureWeakDmatricesTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, - int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubt, - dfloat *_cubPDrT, dfloat *_cubPDsT, dfloat *_cubPDtT); - void CubatureSurfaceMatricesTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes, - int _intNfp, dfloat *_intr, dfloat *_ints, dfloat *_intw, - dfloat *_intInterp, dfloat *_intLIFT); - void SEMFEMInterpMatrixTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, - int _NpFEM, dfloat *_rFEM, dfloat *_sFEM, dfloat *_tFEM, - dfloat *I); - void WarpShiftFace3D(int _N, int Npoints, dfloat alpha, - dfloat *L1, dfloat *L2, dfloat *L3, - dfloat *w1, dfloat *w2); - void WarpBlendTransformTet3D(int _N, int _Npoints, dfloat *_r, dfloat *_s, dfloat *_t, dfloat alphaIn=-1); + static void NodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); + static void FaceNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes); + static void VertexNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_vertexNodes); + static void EquispacedNodesTet3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); + static void EquispacedEToVTet3D(int _N, int *_EToV); + static void SEMFEMEToVTet3D(int _N, int *_EToV); + static void OrthonormalBasisTet3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *P); + static void GradOrthonormalBasisTet3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *Pr, dfloat *Ps, dfloat *Pt); + static void VandermondeTet3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *V); + static void GradVandermondeTet3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *Vr, dfloat *Vs, dfloat *Vt); + static void MassMatrixTet3D(int _Np, dfloat *V, dfloat *_MM); + static void invMassMatrixTet3D(int _Np, dfloat *V, dfloat *_invMM); + static void DmatrixTet3D(int _N, int Npoints, dfloat *_r, dfloat *_s, dfloat *_t, + dfloat *_Dr, dfloat *_Ds, dfloat *_Dt); + static void LIFTmatrixTet3D(int _N, int *_faceNodes, + dfloat *_r, dfloat *_s, dfloat *_t, dfloat *_LIFT); + static void SurfaceMassMatrixTet3D(int _N, dfloat *_MM, dfloat *_LIFT, dfloat *_sM); + static void SmatrixTet3D(int _N, dfloat *_Dr, dfloat *_Ds, dfloat *_Dt, dfloat *_MM, + dfloat *_Srr, dfloat *_Srs, dfloat *_Srt, + dfloat *_Sss, dfloat *_Sst, dfloat *_Stt); + static void InterpolationMatrixTet3D(int _N, + int NpointsIn, dfloat *rIn, dfloat *sIn, dfloat *tIn, + int NpointsOut, dfloat *rOut, dfloat *sOut, dfloat *tOut, + dfloat *I); + static void CubatureNodesTet3D(int cubN, int*cubNp, dfloat **cubr, dfloat **cubs, dfloat **cubt, dfloat **cubw); + static void CubaturePmatrixTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, + int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubt, + dfloat *_cubProject); + static void CubatureWeakDmatricesTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, + int _cubNp, dfloat *_cubr, dfloat *_cubs, dfloat *_cubt, + dfloat *_cubPDrT, dfloat *_cubPDsT, dfloat *_cubPDtT); + static void CubatureSurfaceMatricesTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes, + int _intNfp, dfloat *_intr, dfloat *_ints, dfloat *_intw, + dfloat *_intInterp, dfloat *_intLIFT); + static void SEMFEMInterpMatrixTet3D(int _N, int _Np, dfloat *_r, dfloat *_s, dfloat *_t, + int _NpFEM, dfloat *_rFEM, dfloat *_sFEM, dfloat *_tFEM, + dfloat *I); + static void WarpShiftFace3D(int _N, int Npoints, dfloat alpha, + dfloat *L1, dfloat *L2, dfloat *L3, + dfloat *w1, dfloat *w2); + static void WarpBlendTransformTet3D(int _N, int _Npoints, dfloat *_r, dfloat *_s, dfloat *_t, dfloat alphaIn=-1); //Hexs - void NodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); - void FaceNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes); - void VertexNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_vertexNodes); - void EquispacedNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); - void EquispacedEToVHex3D(int _N, int *_EToV); - void SEMFEMEToVHex3D(int _N, int *_EToV); - void OrthonormalBasisHex3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *P); - void GradOrthonormalBasisHex3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *Pr, dfloat *Ps, dfloat *Pt); - void VandermondeHex3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *V); - void GradVandermondeHex3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, - dfloat *Vr, dfloat *Vs, dfloat *Vt); - void MassMatrixHex3D(int _Np, dfloat *V, dfloat *_MM); - void LumpedMassMatrixHex3D(int _N, dfloat *_gllw, dfloat *_MM); - void invLumpedMassMatrixHex3D(int _N, dfloat *_gllw, dfloat *_invMM); - void DmatrixHex3D(int _N, int Npoints, dfloat *_r, dfloat *_s, dfloat *_t, - dfloat *_Dr, dfloat *_Ds, dfloat *_Dt); - void InterpolationMatrixHex3D(int _N, - int NpointsIn, dfloat *rIn, dfloat *sIn, dfloat *tIn, - int NpointsOut, dfloat *rOut, dfloat *sOut, dfloat *tOut, - dfloat *I); + static void NodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); + static void FaceNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_faceNodes); + static void VertexNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t, int *_vertexNodes); + static void EquispacedNodesHex3D(int _N, dfloat *_r, dfloat *_s, dfloat *_t); + static void EquispacedEToVHex3D(int _N, int *_EToV); + static void SEMFEMEToVHex3D(int _N, int *_EToV); + static void OrthonormalBasisHex3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *P); + static void GradOrthonormalBasisHex3D(dfloat a, dfloat b, dfloat c, int i, int j, int k, dfloat *Pr, dfloat *Ps, dfloat *Pt); + static void VandermondeHex3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, dfloat *V); + static void GradVandermondeHex3D(int N, int Npoints, dfloat *r, dfloat *s, dfloat *t, + dfloat *Vr, dfloat *Vs, dfloat *Vt); + static void MassMatrixHex3D(int _Np, dfloat *V, dfloat *_MM); + static void LumpedMassMatrixHex3D(int _N, dfloat *_gllw, dfloat *_MM); + static void invLumpedMassMatrixHex3D(int _N, dfloat *_gllw, dfloat *_invMM); + static void DmatrixHex3D(int _N, int Npoints, dfloat *_r, dfloat *_s, dfloat *_t, + dfloat *_Dr, dfloat *_Ds, dfloat *_Dt); + static void InterpolationMatrixHex3D(int _N, + int NpointsIn, dfloat *rIn, dfloat *sIn, dfloat *tIn, + int NpointsOut, dfloat *rOut, dfloat *sOut, dfloat *tOut, + dfloat *I); }; #endif diff --git a/libs/core/matrixRightSolve.cpp b/libs/core/matrixRightSolve.cpp index da821f40a..3a3bb64d4 100644 --- a/libs/core/matrixRightSolve.cpp +++ b/libs/core/matrixRightSolve.cpp @@ -33,12 +33,61 @@ extern "C" { double *B, int *LDB, int *INFO ); + void sgesv_ ( int *N, int *NRHS, float *A, int *LDA, int *IPIV, float *B, int *LDB, int *INFO ); + + void dgels_ ( char *TRANS, + int *M, + int *N, + int *NRHS, + double *A, + int *LDA, + double *B, + int *LDB, + double *WORK, + int *LWORK, + int *INFO); + + void dgeqp3_( int *M, + int *N, + double *A, + int *LDA, + int *JPVT, + double *TAU, + double *WORK, + int *LWORK, + int *INFO); + + void dormqr_( char *SIDE, + char *TRANS, + int *M, + int *N, + int *K, + double *A, + int *LDA, + double *TAU, + double *C, + int *LDC, + double *WORK, + int *LWORK, + int *INFO); + + void dtrsm_ ( char *SIDE, + char *UPLO, + char *TRANSA, + char *DIAG, + int *M, + int *N, + double *ALPHA, + double *A, + int *LDA, + double *B, + int *LDB); } // C = A/B = trans(trans(B)\trans(A)) @@ -133,4 +182,120 @@ void matrixRightSolve(int NrowsA, int NcolsA, float *A, int NrowsB, int NcolsB, free(ipiv); free(tmpX); free(tmpY); -} \ No newline at end of file +} + +// Find minimum-norm solution to xA = b with NrowsA > NcolsA (underdetermined). +// +// NB: A must be stored ROW MAJOR. +void matrixUnderdeterminedRightSolveMinNorm(int NrowsA, int NcolsA, dfloat *A, dfloat *b, dfloat *x) +{ + int LWORK, INFO = 0; + dfloat* WORK; + + dfloat* tmpA = new dfloat[NrowsA*NcolsA](); + for (int i = 0; i < NrowsA*NcolsA; i++) + tmpA[i] = A[i]; + + dfloat* tmpb = new dfloat[NrowsA](); + for (int i = 0; i < NcolsA; i++) + tmpb[i] = b[i]; + + // Solve A^T x^T = b^T. Note TRANS = 'N', since A is row major. + char TRANS = 'N'; + int NRHS = 1; + + LWORK = 2*NrowsA*NcolsA; + WORK = new dfloat[LWORK](); + dgels_(&TRANS, &NcolsA, &NrowsA, &NRHS, tmpA, &NcolsA, tmpb, &NrowsA, WORK, &LWORK, &INFO); + + if (INFO != 0) { + std::stringstream ss; + ss << "dgels_ returned INFO = " << INFO; + LIBP_ABORT(ss.str()); + } + + // Copy to output. + for (int i = 0; i < NrowsA; i++) + x[i] = tmpb[i]; + + delete[] WORK; + delete[] tmpA; + delete[] tmpb; +} + +// Solve xA = b with NrowsA > NcolsA (underdetermined) using column-pivoted QR. +// +// Done by solving A^T x^T = b^T in 4 steps: +// 1. Decompose A^T * P = Q * R. --> Q * R * P^T x^T = b^T +// 2. Multiply by Q^T. --> R * P^T x^T = Q^T b^T +// 3. Backsolve with R1. --> P^T * x^T = R1^{-1} Q^T b^T +// where R1 = leading NcolsA * NcolsA submatrix of R. +// 4. Apply permutation. --> x^T = P R1^{-1} Q^T b^T +// +// NB: A must be stored ROW MAJOR. +void matrixUnderdeterminedRightSolveCPQR(int NrowsA, int NcolsA, dfloat *A, dfloat *b, dfloat *x) +{ + int LWORK, INFO = 0; + dfloat* WORK; + + dfloat* tmpA = new dfloat[NrowsA*NcolsA](); + for (int i = 0; i < NrowsA*NcolsA; i++) + tmpA[i] = A[i]; + + dfloat* tmpb = new dfloat[NrowsA](); + for (int i = 0; i < NcolsA; i++) + tmpb[i] = b[i]; + + // Compute A^T * P = Q * R. + int* JPVT = new int[NrowsA](); + dfloat* TAU = new dfloat[mymin(NrowsA, NcolsA)](); + + LWORK = 3*NrowsA + 1; + WORK = new dfloat[LWORK](); + dgeqp3_(&NcolsA, &NrowsA, tmpA, &NcolsA, JPVT, TAU, WORK, &LWORK, &INFO); + + if (INFO != 0) { + std::stringstream ss; + ss << "dgeqp3_ returned INFO = " << INFO; + LIBP_ABORT(ss.str()); + } + + delete[] WORK; + + // Compute Q^T * b^T. + char SIDE = 'L'; + char TRANS = 'T'; + int NRHS = 1; + int NREFLS = NcolsA; + + LWORK = 1; + WORK = new dfloat[LWORK](); + dormqr_(&SIDE, &TRANS, &NcolsA, &NRHS, &NREFLS, tmpA, &NcolsA, TAU, tmpb, &NcolsA, WORK, &LWORK, &INFO); + + if (INFO != 0) { + std::stringstream ss; + ss << "dormqr_ returned INFO = " << INFO; + LIBP_ABORT(ss.str()); + } + + delete[] WORK; + + // Compute R1^{-1} * Q^T * b^T + SIDE = 'L'; + char UPLO = 'U'; + char TRANSA = 'N'; + char DIAG = 'N'; + NRHS = 1; + dfloat ALPHA = 1.0; + + dtrsm_(&SIDE, &UPLO, &TRANSA, &DIAG, &NcolsA, &NRHS, &ALPHA, tmpA, &NcolsA, tmpb, &NcolsA); + + // Apply the permutation. + for (int i = 0; i < NrowsA; i++) + x[JPVT[i] - 1] = tmpb[i]; + + delete[] JPVT; + delete[] TAU; + delete[] tmpA; + delete[] tmpb; +} diff --git a/libs/linearSolver/initialGuess.cpp b/libs/linearSolver/initialGuess.cpp new file mode 100644 index 000000000..5bca22e33 --- /dev/null +++ b/libs/linearSolver/initialGuess.cpp @@ -0,0 +1,596 @@ +/* + +The MIT License (MIT) + +Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include "initialGuess.hpp" +#include "mesh.hpp" + +initialGuessSolver_t* initialGuessSolver_t::Setup(dlong N, dlong Nhalo, platform_t& platform, settings_t& settings, MPI_Comm comm, int weighted, occa::memory& o_weight) +{ + initialGuessSolver_t* initialGuessSolver = new initialGuessSolver_t(N, Nhalo, platform, settings, comm); + initialGuessSolver->linearSolver = linearSolver_t::Setup(N, Nhalo, platform, settings, comm, weighted, o_weight); + initialGuessSolver->igStrategy = nullptr; + + if (settings.compareSetting("INITIAL GUESS STRATEGY", "NONE")) { + initialGuessSolver->igStrategy = new igDefaultStrategy(N, platform, settings, comm, weighted, o_weight); + } else if (settings.compareSetting("INITIAL GUESS STRATEGY", "ZERO")) { + initialGuessSolver->igStrategy = new igZeroStrategy(N, platform, settings, comm, weighted, o_weight); + } else if (settings.compareSetting("INITIAL GUESS STRATEGY", "CLASSIC")) { + initialGuessSolver->igStrategy = new igClassicProjectionStrategy(N, platform, settings, comm, weighted, o_weight); + } else if (settings.compareSetting("INITIAL GUESS STRATEGY", "QR")) { + initialGuessSolver->igStrategy = new igRollingQRProjectionStrategy(N, platform, settings, comm, weighted, o_weight); + } else if (settings.compareSetting("INITIAL GUESS STRATEGY", "EXTRAP")) { + initialGuessSolver->igStrategy = new igExtrapStrategy(N, platform, settings, comm, weighted, o_weight); + } else { + LIBP_ABORT("Requested INITIAL GUESS STRATEGY not found."); + } + + return initialGuessSolver; +} + +initialGuessSolver_t::initialGuessSolver_t(dlong _N, dlong _Nhalo, platform_t& _platform, settings_t& _settings, MPI_Comm _comm): + linearSolver_t(_N, _Nhalo, _platform, _settings, _comm), + igStrategy(nullptr), + linearSolver(nullptr) +{ + return; +} + +initialGuessSolver_t::~initialGuessSolver_t() +{ + delete igStrategy; + delete linearSolver; +} + +int initialGuessSolver_t::Solve(solver_t& solver, precon_t& precon, occa::memory& o_x, occa::memory& o_rhs, const dfloat tol, const int MAXIT, const int verbose) +{ + int iter = 0; + + igStrategy->FormInitialGuess(o_x, o_rhs); + iter = linearSolver->Solve(solver, precon, o_x, o_rhs, tol, MAXIT, verbose); + igStrategy->Update(solver, o_x, o_rhs); + + return iter; +} + +/*****************************************************************************/ + +void initialGuessAddSettings(settings_t& settings, const string prefix) +{ + settings.newSetting(prefix + "INITIAL GUESS STRATEGY", + "NONE", + "Strategy for selecting initial guess for linear solver", + {"NONE", "ZERO", "CLASSIC", "QR", "EXTRAP"}); + + settings.newSetting(prefix + "INITIAL GUESS HISTORY SPACE DIMENSION", + "-1", + "Dimension of the initial guess space"); + + settings.newSetting(prefix + "INITIAL GUESS EXTRAP DEGREE", + "-1", + "Degree used for EXTRAP initial guess schemes."); + + settings.newSetting(prefix + "INITIAL GUESS EXTRAP COEFFS METHOD", + "MINNORM", + "Method for selecting coefficients with EXTRAP initial guess schemes.", + {"MINNORM", "CPQR"}); +} + +/*****************************************************************************/ + +initialGuessStrategy_t::initialGuessStrategy_t(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + platform(_platform), + settings(_settings), + comm(_comm), + Ntotal(_N) +{ + return; +} + +initialGuessStrategy_t::~initialGuessStrategy_t() +{ + return; +} + +/*****************************************************************************/ + +igDefaultStrategy::igDefaultStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + initialGuessStrategy_t(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + return; +} + +void igDefaultStrategy::FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs) +{ + return; +} + +void igDefaultStrategy::Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs) +{ + return; +} + +/*****************************************************************************/ + +igZeroStrategy::igZeroStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + initialGuessStrategy_t(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + platform.linAlg.InitKernels({"set"}); + return; +} + +void igZeroStrategy::FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs) +{ + platform.linAlg.set(Ntotal, 0.0, o_x); + return; +} + +void igZeroStrategy::Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs) +{ + return; +} + +/*****************************************************************************/ + +igProjectionStrategy::igProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + initialGuessStrategy_t(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + curDim = 0; + settings.getSetting("INITIAL GUESS HISTORY SPACE DIMENSION", maxDim); + + o_btilde = platform.malloc(Ntotal*sizeof(dfloat)); + o_xtilde = platform.malloc(Ntotal*sizeof(dfloat)); + o_Btilde = platform.malloc(Ntotal*maxDim*sizeof(dfloat)); + o_Xtilde = platform.malloc(Ntotal*maxDim*sizeof(dfloat)); + + o_w = _o_weight; + + alphas = new dfloat[maxDim](); + alphasThisRank = new dfloat[maxDim](); + o_alphas = platform.malloc(maxDim*sizeof(dfloat)); + + ctmpNblocks = (Ntotal + BLOCKSIZE - 1)/BLOCKSIZE; + ctmp = (dfloat*)calloc(ctmpNblocks*maxDim, sizeof(dfloat)); + o_ctmp = platform.malloc(ctmpNblocks*maxDim*sizeof(dfloat), ctmp); + + // Build kernels. + platform.linAlg.InitKernels({"set"}); + + occa::properties kernelInfo = platform.props; + kernelInfo["defines/" "p_igNhist"] = maxDim; + + igBasisInnerProductsKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igBasisInnerProducts.okl", "igBasisInnerProducts", kernelInfo); + igReconstructKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igReconstruct.okl", "igReconstruct", kernelInfo); + igScaleKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igScale.okl", "igScale", kernelInfo); + igUpdateKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igUpdate.okl", "igUpdate", kernelInfo); + + return; +} + +igProjectionStrategy::~igProjectionStrategy() +{ + if (ctmp) + delete[] ctmp; + if (alphas) + delete[] alphas; + if (alphasThisRank) + delete[] alphasThisRank; + + return; +} + +void igProjectionStrategy::FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs) +{ + if (curDim > 0) { + igBasisInnerProducts(o_rhs, o_Btilde, o_alphas, alphas, alphasThisRank); + platform.linAlg.set(Ntotal, 0.0, o_x); + igReconstruct(o_x, 1.0, o_alphas, o_Xtilde, o_x); + } + + return; +} + +void igProjectionStrategy::igBasisInnerProducts(occa::memory& o_x, occa::memory& o_Q, occa::memory& o_c, dfloat *c, dfloat *cThisRank) +{ + igBasisInnerProductsKernel(Ntotal, ctmpNblocks, o_w, curDim, o_x, o_Q, o_ctmp); + + o_ctmp.copyTo(ctmp, ctmpNblocks*curDim*sizeof(dfloat)); + + dlong cnt = 0; + for (int m = 0; m < curDim; ++m) { + cThisRank[m] = 0; + for (int n = 0; n < ctmpNblocks; ++n) { + cThisRank[m] += ctmp[cnt]; + ++cnt; + } + } + + MPI_Allreduce(cThisRank, c, curDim, MPI_DFLOAT, MPI_SUM, comm); + o_c.copyFrom(c, curDim*sizeof(dfloat)); + + return; +} + +void igProjectionStrategy::igReconstruct(occa::memory& o_u, dfloat a, occa::memory& o_c, occa::memory& o_Q, occa::memory& o_unew) +{ + igReconstructKernel(Ntotal, curDim, o_u, a, o_c, o_Q, o_unew); + return; +} + + +/*****************************************************************************/ + +igClassicProjectionStrategy::igClassicProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + igProjectionStrategy(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + return; +} + +void igClassicProjectionStrategy::Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs) +{ + // Compute RHS corresponding to the approximate solution obtained. + solver.Operator(o_x, o_btilde); + + // Insert new solution into the initial guess space. + if ((curDim >= maxDim) || (curDim == 0)) { + dfloat normbtilde = 0.0; + + normbtilde = platform.linAlg.weightedNorm2(Ntotal, o_w, o_btilde, comm); + + if (normbtilde > 0) { + igScaleKernel(Ntotal, 1.0/normbtilde, o_btilde, o_Btilde); + igScaleKernel(Ntotal, 1.0/normbtilde, o_x, o_Xtilde); + + curDim = 1; + } + } else { + dfloat invnormbtilde = 0.0; + const int Nreorth = 2; + + o_x.copyTo(o_xtilde, Ntotal*sizeof(dfloat)); + + // Orthogonalize new RHS against previous ones. + for (int n = 0; n < Nreorth; n++) { + igBasisInnerProducts(o_btilde, o_Btilde, o_alphas, alphas, alphasThisRank); + igReconstruct(o_btilde, (dfloat)(-1.0), o_alphas, o_Btilde, o_btilde); + igReconstruct(o_xtilde, (dfloat)(-1.0), o_alphas, o_Xtilde, o_xtilde); + } + + // Normalize. + invnormbtilde = platform.linAlg.weightedNorm2(Ntotal, o_w, o_btilde, comm); + invnormbtilde = 1.0/invnormbtilde; + +#if 0 + igScaleKernel(Ntotal, invnormbtilde, o_btilde, o_btilde); + igScaleKernel(Ntotal, invnormbtilde, o_xtilde, o_xtilde); + + // Store. + o_btilde.copyTo(o_Btilde + curDim*Ntotal*sizeof(dfloat)); + o_xtilde.copyTo(o_Xtilde + curDim*Ntotal*sizeof(dfloat)); +#else + igUpdateKernel(Ntotal, curDim, invnormbtilde, o_btilde, 1, o_Btilde, o_xtilde, 1, o_Xtilde); +#endif + + curDim++; + } + + return; +} + +/*****************************************************************************/ + +igRollingQRProjectionStrategy::igRollingQRProjectionStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + igProjectionStrategy(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + R = new dfloat[maxDim*maxDim](); + o_R = platform.malloc(maxDim*maxDim*sizeof(dfloat)); + + occa::properties kernelInfo = platform.props; + kernelInfo["defines/" "p_igNhist"] = maxDim; + + igDropQRFirstColumnKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igDropQRFirstColumn.okl", "igDropQRFirstColumn", kernelInfo); + + return; +} + +igRollingQRProjectionStrategy::~igRollingQRProjectionStrategy() +{ + if (R) + delete[] R; + + return; +} + +void igRollingQRProjectionStrategy::Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs) +{ + // Compute RHS corresponding to the approximate solution obtained. + solver.Operator(o_x, o_btilde); + + // Rotate the history space (QR update). + if (curDim == maxDim) { + // Drop the first column in the QR factorization: R = R(:, 2:end). + for (int j = 0; j < maxDim; j++) { + for (int i = 0; i < maxDim - 1; i++) + R[j*maxDim + i] = R[j*maxDim + (i + 1)]; + R[j*maxDim + (maxDim - 1)] = 0.0; + } + + o_R.copyFrom(R); + + // Update the RHS and solution spaces. + igDropQRFirstColumnKernel(Ntotal, o_Btilde, o_Xtilde, o_R); + + // Restore R to triangular form (overlapped with Q update). + for (int j = 0; j < maxDim - 1 ; j++) { + dfloat c = 0.0, s = 0.0; + dfloat Rjj = R[j*maxDim + j]; + dfloat Rjp1j = R[(j + 1)*maxDim + j]; + + givensRotation(Rjj, Rjp1j, &c, &s); + + for (int i = j; i < maxDim; i++) { + dfloat Rji = R[j*maxDim + i]; + dfloat Rjp1i = R[(j + 1)*maxDim + i]; + + R[j*maxDim + i] = c*Rji + s*Rjp1i; + R[(j + 1)*maxDim + i] = -s*Rji + c*Rjp1i; + } + } + + // Copy the updated R back to the device. + platform.device.finish(); + o_R.copyFrom(R); + + curDim--; + } + + // Orthogonalize and tack on the new column. + if (curDim == 0) { + dfloat normbtilde = 0.0; + + normbtilde = platform.linAlg.weightedNorm2(Ntotal, o_w, o_btilde, comm); + + if (normbtilde > 0) { +#if 0 + igScaleKernel(Ntotal, 1.0/normbtilde, o_btilde, o_Btilde); + igScaleKernel(Ntotal, 1.0/normbtilde, o_x, o_Xtilde); +#else + dfloat invnormbtilde = 1.0/normbtilde; + igUpdateKernel(Ntotal, 0, invnormbtilde, o_btilde, 0, o_Btilde, o_x, 0, o_Xtilde); +#endif + + R[0] = normbtilde; + + curDim = 1; + } + } else { + dfloat normbtilde = 0.0, normbtildeproj = 0.0;; + const int Nreorth = 2; + + o_x.copyTo(o_xtilde, Ntotal*sizeof(dfloat)); + + // Compute the initial norm of the new vector. + normbtilde = platform.linAlg.weightedNorm2(Ntotal, o_w, o_btilde, comm); + + // Zero the entries above/on the diagonal of the column of R into which we want to write. + for (int i = 0; i < curDim; i++) + R[i*maxDim + curDim] = 0.0; + + // Orthogonalize new RHS against previous ones. + for (int n = 0; n < Nreorth; n++) { + igBasisInnerProducts(o_btilde, o_Btilde, o_alphas, alphas, alphasThisRank); + igReconstruct(o_btilde, (dfloat)(-1.0), o_alphas, o_Btilde, o_btilde); + igReconstruct(o_xtilde, (dfloat)(-1.0), o_alphas, o_Xtilde, o_xtilde); + + for (int i = 0; i < curDim; i++) + R[i*maxDim + curDim] += alphas[i]; + } + + // Normalize. + normbtildeproj = platform.linAlg.weightedNorm2(Ntotal, o_w, o_btilde, comm); + + // Only add if the remainder after projection is large enough. + // + // TODO: What is the appropriate criterion here? + if (normbtildeproj/normbtilde > 1.0e-10) { +#if 0 + igScaleKernel(Ntotal, 1.0/normbtildeproj, o_btilde, o_btilde); + igScaleKernel(Ntotal, 1.0/normbtildeproj, o_xtilde, o_xtilde); + + // Store. + o_btilde.copyTo(o_Btilde + curDim*Ntotal*sizeof(dfloat)); + o_xtilde.copyTo(o_Xtilde + curDim*Ntotal*sizeof(dfloat)); +#else + dfloat invnormbtildeproj = 1.0/normbtildeproj; + igUpdateKernel(Ntotal, curDim, invnormbtildeproj, o_btilde, 1, o_Btilde, o_xtilde, 1, o_Xtilde); +#endif + + R[curDim*maxDim + curDim] = normbtildeproj; + + curDim++; + } + } + + o_R.copyFrom(R); +} + +void igRollingQRProjectionStrategy::givensRotation(dfloat a, dfloat b, dfloat *c, dfloat *s) +{ + // Compute a Givens rotation that zeros the bottom component of [a ; b]. + if (b != 0) { + dfloat h = hypot(a, b); + dfloat d = 1.0/h; + *c = fabs(a)*d; + *s = copysign(d, a)*b; + } else { + *c = 1.0; + *s = 0.0; + } + + return; +} + +/*****************************************************************************/ + +igExtrapStrategy::igExtrapStrategy(dlong _N, platform_t& _platform, settings_t& _settings, MPI_Comm _comm, int _weighted, occa::memory& _o_weight): + initialGuessStrategy_t(_N, _platform, _settings, _comm, _weighted, _o_weight) +{ + int M, m; + settings.getSetting("INITIAL GUESS HISTORY SPACE DIMENSION", M); + settings.getSetting("INITIAL GUESS EXTRAP DEGREE", m); + + dfloat *c = new dfloat[M](); + extrapCoeffs(m, M, c); + + Nhistory = M; + + entry = 0; + + o_coeffs = platform.malloc(Nhistory*sizeof(dfloat), c); + + shift = 0; + + o_xh = platform.malloc(Nhistory*Ntotal*sizeof(dfloat)); + + platform.linAlg.InitKernels({"set"}); + + occa::properties kernelInfo = platform.props; + kernelInfo["defines/" "p_igNhist"] = Nhistory; + + igExtrapKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igExtrap.okl", "igExtrap", kernelInfo); + igExtrapSparseKernel = platform.buildKernel(LINEARSOLVER_DIR "/okl/igExtrap.okl", "igExtrapSparse", kernelInfo); + + platform.linAlg.set(Nhistory*Ntotal, 0.0, o_xh); + + delete[] c; + + return; +} + +void igExtrapStrategy::FormInitialGuess(occa::memory& o_x, occa::memory& o_rhs) +{ + if (entry < Nhistory) { + int M, m; + if (entry == Nhistory - 1) { + settings.getSetting("INITIAL GUESS HISTORY SPACE DIMENSION", M); + settings.getSetting("INITIAL GUESS EXTRAP DEGREE", m); + } else { + M = mymax(1, entry + 1); + m = sqrt((double)M); + } + + // Construct the extrapolation coefficients. + dfloat *c, *d, *sparseCoeffs; + + c = new dfloat[Nhistory](); + d = new dfloat[Nhistory](); + sparseCoeffs = new dfloat[Nhistory](); + for (int n = 0; n < Nhistory; ++n) { + c[n] = 0; + d[n] = 0; + sparseCoeffs[n] = 0; + } + + if (M == 1) { + d[Nhistory - 1] = 1.0; + } else { + extrapCoeffs(m, M, c); + + // need d[0:M-1] = {0, 0, 0, .., c[0], c[1], .., c[M-1]} + for (int i = 0; i < M; i++) + d[Nhistory - M + i] = c[i]; + } + + int *sparseIds = new int[Nhistory](); + Nsparse = 0; + for (int n = 0; n < Nhistory; ++n) { + if (fabs(d[n]) > 1e-14) { // hmm + sparseIds[Nsparse] = n; + sparseCoeffs[Nsparse] = d[n]; + ++Nsparse; + } + } + + o_coeffs = platform.malloc(Nhistory*sizeof(dfloat), d); + o_sparseIds = platform.malloc(Nhistory*sizeof(int), sparseIds); + o_sparseCoeffs = platform.malloc(Nhistory*sizeof(dfloat), sparseCoeffs); + + ++entry; + + delete[] sparseIds; + } + + if (settings.compareSetting("INITIAL GUESS EXTRAP COEFFS METHOD", "MINNORM")) + igExtrapKernel(Ntotal, Nhistory, shift, o_coeffs, o_xh, o_x); + else { + igExtrapSparseKernel(Ntotal, Nhistory, shift, Nsparse, o_sparseIds, o_sparseCoeffs, o_xh, o_x); + } + + return; +} + +void igExtrapStrategy::Update(solver_t &solver, occa::memory& o_x, occa::memory& o_rhs) +{ + occa::memory o_tmp = o_xh + Ntotal*shift*sizeof(dfloat); + o_x.copyTo(o_tmp, Ntotal*sizeof(dfloat)); + shift = (shift + 1) % Nhistory; + + return; +} + +void igExtrapStrategy::extrapCoeffs(int m, int M, dfloat *c) +{ + dfloat h, ro, *r, *V, *b; + + if (M < m + 1) { + std::stringstream ss; + ss << "Extrapolation space dimension (" << M << ") too low for degree (" << m << ")."; + LIBP_ABORT(ss.str()); + } + + h = 2.0/(M - 1); + r = new dfloat[M](); + for (int i = 0; i < M; i++) + r[i] = -1.0 + i*h; + ro = 1.0 + h; // Evaluation point. + + V = new dfloat[(m + 1)*M](); + mesh_t::Vandermonde1D(m, M, r, V); + + b = new dfloat[m + 1](); + mesh_t::Vandermonde1D(m, 1, &ro, b); + + if (settings.compareSetting("INITIAL GUESS EXTRAP COEFFS METHOD", "MINNORM")) { + matrixUnderdeterminedRightSolveMinNorm(M, m + 1, V, b, c); + } else if (settings.compareSetting("INITIAL GUESS EXTRAP COEFFS METHOD", "CPQR")) { + matrixUnderdeterminedRightSolveCPQR(M, m + 1, V, b, c); + } + + delete[] r; + delete[] V; + delete[] b; + + return; +} diff --git a/libs/linearSolver/linearSolverPCG.cpp b/libs/linearSolver/linearSolverPCG.cpp index cf416677d..2eb8a77e3 100644 --- a/libs/linearSolver/linearSolverPCG.cpp +++ b/libs/linearSolver/linearSolverPCG.cpp @@ -77,6 +77,19 @@ int pcg::Solve(solver_t& solver, precon_t& precon, dfloat rdotz2 = 0.0; dfloat alpha = 0.0, beta = 0.0, pAp = 0.0; dfloat rdotr = 0.0; + dfloat TOL = 0.0; + + // Comput norm of RHS (for stopping tolerance). + if (settings.compareSetting("LINEAR SOLVER STOPPING CRITERION", "ABS/REL-RHS-2NORM")) { + dfloat normb = 0.0; + + if (weighted) + normb = linAlg.weightedNorm2(N, o_w, o_r, comm); + else + normb = linAlg.norm2(N, o_r, comm); + + TOL = mymax(tol*tol*normb*normb, tol*tol); + } // compute A*x solver.Operator(o_x, o_Ax); @@ -90,7 +103,9 @@ int pcg::Solve(solver_t& solver, precon_t& precon, rdotr = linAlg.norm2(N, o_r, comm); rdotr = rdotr*rdotr; - dfloat TOL = mymax(tol*tol*rdotr,tol*tol); + if (settings.compareSetting("LINEAR SOLVER STOPPING CRITERION", "ABS/REL-INITRESID")) { + TOL = mymax(tol*tol*rdotr,tol*tol); + } if (verbose&&(rank==0)) printf("PCG: initial res norm %12.12f \n", sqrt(rdotr)); @@ -98,8 +113,11 @@ int pcg::Solve(solver_t& solver, precon_t& precon, int iter; for(iter=0;iter 0) && (rdotr <= TOL))) { + break; + } // z = Precon^{-1} r precon.Operator(o_r, o_z); @@ -176,4 +194,4 @@ dfloat pcg::UpdatePCG(const dfloat alpha, occa::memory &o_x, occa::memory &o_r){ pcg::~pcg() { updatePCGKernel.free(); -} \ No newline at end of file +} diff --git a/libs/linearSolver/okl/igBasisInnerProducts.okl b/libs/linearSolver/okl/igBasisInnerProducts.okl new file mode 100644 index 000000000..028e55663 --- /dev/null +++ b/libs/linearSolver/okl/igBasisInnerProducts.okl @@ -0,0 +1,144 @@ +/* + +The MIT License (MIT) + +Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#define p_blockSize 256 + +@kernel void igBasisInnerProducts(const dlong Ntotal, + const dlong Nblocks, + @restrict const dfloat *w, + const dlong dim, // how many inner products + const dfloat *x, // one x + const dfloat *y, // multiple y + dfloat *wxy) +{ + for (dlong b = 0; b < Nblocks; ++b; @outer(0)) { + + @shared volatile dfloat s_wxy[p_blockSize]; + + // load x to register + // block over igNhist + + @exclusive dfloat r_wx; + + for (int t = 0; t < p_blockSize; ++t; @inner(0)) { + const dlong id = t + p_blockSize*b; + + if (id < Ntotal) { + r_wx = w[id]*x[id]; + } + } + + for (int fld = 0; fld < dim; ++fld) { + + @barrier("local"); + + for (int t = 0; t < p_blockSize; ++t; @inner(0)) { + dlong id = t + p_blockSize*b; + + dfloat res = 0; + + if (id < Ntotal) { + dlong vid = id + fld*Ntotal; + + res += r_wx*y[vid]; // fmad + } + + s_wxy[t] = res; + } + + @barrier("local"); + +#if p_blockSize>512 + for(int t=0;t256 + for(int t=0;tGatherScatter(o_Mr, ogs_dfloat, ogs_add, ogs_sym); + elliptic.linAlg.amx(Ntotal, (dfloat)1.0, elliptic.o_weight, o_Mr); + } } // zero mean of RHS diff --git a/solvers/elliptic/src/ellipticSettings.cpp b/solvers/elliptic/src/ellipticSettings.cpp index 916f72f02..2f4540eb2 100644 --- a/solvers/elliptic/src/ellipticSettings.cpp +++ b/solvers/elliptic/src/ellipticSettings.cpp @@ -66,6 +66,11 @@ void ellipticAddSettings(settings_t& settings, "Iterative Linear Solver to use for solve", {"PCG", "FPCG", "NBPCG", "NBFPCG", "PGMRES"}); + settings.newSetting(prefix+"LINEAR SOLVER STOPPING CRITERION", + "ABS/REL-INITRESID", + "Stopping criterion for the linear solver", + {"ABS/REL-INITRESID", "ABS/REL-RHS-2NORM"}); + settings.newSetting(prefix+"PRECONDITIONER", "NONE", "Preconditioning Strategy", @@ -145,4 +150,4 @@ void ellipticSettings_t::parseFromFile(platformSettings_t& platformSettings, LIBP_ABORT(ss.str()); } } -} \ No newline at end of file +} diff --git a/solvers/ins/ins.hpp b/solvers/ins/ins.hpp index e0218661a..e972f9ccd 100644 --- a/solvers/ins/ins.hpp +++ b/solvers/ins/ins.hpp @@ -34,6 +34,7 @@ #include "timeStepper.hpp" #include "linAlg.hpp" #include "elliptic.hpp" +#include "initialGuess.hpp" #define DINS LIBP_DIR"/solvers/ins/" @@ -91,7 +92,9 @@ class ins_t: public solver_t { elliptic_t *uSolver, *vSolver, *wSolver; elliptic_t *pSolver; + linearSolver_t *uLinearSolver; linearSolver_t *vLinearSolver; + linearSolver_t *wLinearSolver; linearSolver_t *pLinearSolver; int NVfields, NTfields; diff --git a/solvers/ins/setups/setupHex3D.rc b/solvers/ins/setups/setupHex3D.rc index 657eddfbe..5b2bed4e5 100644 --- a/solvers/ins/setups/setupHex3D.rc +++ b/solvers/ins/setups/setupHex3D.rc @@ -89,6 +89,9 @@ ins [VELOCITY LINEAR SOLVER] PCG +[VELOCITY LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [VELOCITY DISCRETIZATION] CONTINUOUS @@ -97,6 +100,12 @@ CONTINUOUS [VELOCITY PRECONDITIONER] JACOBI +[VELOCITY INITIAL GUESS STRATEGY] +NONE + +[VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [VELOCITY VERBOSE] TRUE @@ -141,6 +150,9 @@ CHEBYSHEV [PRESSURE LINEAR SOLVER] FPCG +[PRESSURE LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [PRESSURE DISCRETIZATION] CONTINUOUS @@ -149,6 +161,18 @@ CONTINUOUS [PRESSURE PRECONDITIONER] MULTIGRID +[PRESSURE INITIAL GUESS STRATEGY] +NONE + +[PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION] +8 + +[PRESSURE INITIAL GUESS EXTRAP DEGREE] +3 + +[PRESSURE INITIAL GUESS EXTRAP COEFFS METHOD] +CPQR + [PRESSURE VERBOSE] TRUE diff --git a/solvers/ins/setups/setupQuad2D.rc b/solvers/ins/setups/setupQuad2D.rc index 9eee25044..480d39e36 100644 --- a/solvers/ins/setups/setupQuad2D.rc +++ b/solvers/ins/setups/setupQuad2D.rc @@ -94,10 +94,19 @@ PCG [VELOCITY DISCRETIZATION] CONTINUOUS +[VELOCITY LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be NONE, JACOBI, MASSMATRIX, PARALMOND, SEMFEM, MULTIGRID, or OAS [VELOCITY PRECONDITIONER] JACOBI +[VELOCITY INITIAL GUESS STRATEGY] +NONE + +[VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [VELOCITY VERBOSE] TRUE @@ -142,6 +151,9 @@ CHEBYSHEV [PRESSURE LINEAR SOLVER] FPCG +[PRESSURE LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [PRESSURE DISCRETIZATION] CONTINUOUS @@ -150,6 +162,12 @@ CONTINUOUS [PRESSURE PRECONDITIONER] MULTIGRID +[PRESSURE INITIAL GUESS STRATEGY] +NONE + +[PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [PRESSURE VERBOSE] TRUE diff --git a/solvers/ins/setups/setupQuad3D.rc b/solvers/ins/setups/setupQuad3D.rc index 0cfb2d1e2..788b97932 100644 --- a/solvers/ins/setups/setupQuad3D.rc +++ b/solvers/ins/setups/setupQuad3D.rc @@ -95,6 +95,9 @@ VTU [VELOCITY KRYLOV SOLVER] PCG +[VELOCITY LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [VELOCITY DISCRETIZATION] IPDG @@ -109,6 +112,12 @@ NODAL #MULTIGRID JACOBI +[VELOCITY INITIAL GUESS STRATEGY] +NONE + +[VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + ########## MULTIGRID Options ############## # can be ALLDEGREES, HALFDEGREES, HALFDOFS @@ -161,6 +170,9 @@ MAX [PRESSURE KRYLOV SOLVER] PCG,FLEXIBLE +[PRESSURE LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [PRESSURE DISCRETIZATION] #IPDG @@ -175,6 +187,12 @@ NODAL MULTIGRID #JACOBI +[PRESSURE INITIAL GUESS STRATEGY] +NONE + +[PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + ########## MULTIGRID Options ############## # can be ALLDEGREES, HALFDEGREES, HALFDOFS diff --git a/solvers/ins/setups/setupTet3D.rc b/solvers/ins/setups/setupTet3D.rc index 25a9991e8..1173fa309 100644 --- a/solvers/ins/setups/setupTet3D.rc +++ b/solvers/ins/setups/setupTet3D.rc @@ -89,6 +89,9 @@ ins [VELOCITY LINEAR SOLVER] PCG +[VELOCITY LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [VELOCITY DISCRETIZATION] CONTINUOUS @@ -97,6 +100,12 @@ CONTINUOUS [VELOCITY PRECONDITIONER] MASSMATRIX +[VELOCITY INITIAL GUESS STRATEGY] +NONE + +[VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [VELOCITY VERBOSE] TRUE @@ -141,6 +150,9 @@ CHEBYSHEV [PRESSURE LINEAR SOLVER] FPCG +[PRESSURE LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [PRESSURE DISCRETIZATION] CONTINUOUS @@ -149,6 +161,12 @@ CONTINUOUS [PRESSURE PRECONDITIONER] MULTIGRID +[PRESSURE INITIAL GUESS STRATEGY] +NONE + +[PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [PRESSURE VERBOSE] TRUE diff --git a/solvers/ins/setups/setupTri2D.rc b/solvers/ins/setups/setupTri2D.rc index edd48071c..f33a49ece 100644 --- a/solvers/ins/setups/setupTri2D.rc +++ b/solvers/ins/setups/setupTri2D.rc @@ -90,6 +90,9 @@ ins [VELOCITY LINEAR SOLVER] PCG +[VELOCITY LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [VELOCITY DISCRETIZATION] CONTINUOUS @@ -98,6 +101,12 @@ CONTINUOUS [VELOCITY PRECONDITIONER] MASSMATRIX +[VELOCITY INITIAL GUESS STRATEGY] +NONE + +[VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [VELOCITY VERBOSE] TRUE @@ -142,6 +151,9 @@ CHEBYSHEV [PRESSURE LINEAR SOLVER] FPCG +[PRESSURE LINEAR SOLVER STOPPING CRITERION] +ABS/REL-INITRESID + # can be IPDG, or CONTINUOUS [PRESSURE DISCRETIZATION] CONTINUOUS @@ -150,6 +162,12 @@ CONTINUOUS [PRESSURE PRECONDITIONER] MULTIGRID +[PRESSURE INITIAL GUESS STRATEGY] +NONE + +[PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION] +-1 + [PRESSURE VERBOSE] TRUE diff --git a/solvers/ins/src/insSettings.cpp b/solvers/ins/src/insSettings.cpp index d1ac1297b..f64a57a75 100644 --- a/solvers/ins/src/insSettings.cpp +++ b/solvers/ins/src/insSettings.cpp @@ -84,9 +84,11 @@ insSettings_t::insSettings_t(MPI_Comm& _comm): ellipticAddSettings(*this, "VELOCITY "); parAlmond::AddSettings(*this, "VELOCITY "); + initialGuessAddSettings(*this, "VELOCITY "); ellipticAddSettings(*this, "PRESSURE "); parAlmond::AddSettings(*this, "PRESSURE "); + initialGuessAddSettings(*this, "PRESSURE "); } void insSettings_t::report() { @@ -117,6 +119,8 @@ void insSettings_t::report() { reportSetting("VELOCITY DISCRETIZATION"); reportSetting("VELOCITY LINEAR SOLVER"); + reportSetting("VELOCITY INITIAL GUESS STRATEGY"); + reportSetting("VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION"); reportSetting("VELOCITY PRECONDITIONER"); if (compareSetting("VELOCITY PRECONDITIONER","MULTIGRID")) { @@ -137,6 +141,8 @@ void insSettings_t::report() { reportSetting("PRESSURE DISCRETIZATION"); reportSetting("PRESSURE LINEAR SOLVER"); + reportSetting("PRESSURE INITIAL GUESS STRATEGY"); + reportSetting("PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION"); reportSetting("PRESSURE PRECONDITIONER"); if (compareSetting("PRESSURE PRECONDITIONER","MULTIGRID")) { @@ -184,6 +190,8 @@ ellipticSettings_t* insSettings_t::extractVelocitySettings() { ellipticSettings_t* velocitySettings = new ellipticSettings_t(comm); + initialGuessAddSettings(*velocitySettings); + for(auto it = velocitySettings->settings.begin(); it != velocitySettings->settings.end(); ++it) { setting_t* set = it->second; const string name = set->getName(); @@ -201,6 +209,8 @@ ellipticSettings_t* insSettings_t::extractPressureSettings() { ellipticSettings_t* pressureSettings = new ellipticSettings_t(comm); + initialGuessAddSettings(*pressureSettings); + for(auto it = pressureSettings->settings.begin(); it != pressureSettings->settings.end(); ++it) { setting_t* set = it->second; const string name = set->getName(); @@ -212,4 +222,4 @@ ellipticSettings_t* insSettings_t::extractPressureSettings() { } return pressureSettings; -} \ No newline at end of file +} diff --git a/solvers/ins/src/insSetup.cpp b/solvers/ins/src/insSetup.cpp index ea56919d9..349f95aeb 100644 --- a/solvers/ins/src/insSetup.cpp +++ b/solvers/ins/src/insSetup.cpp @@ -81,7 +81,9 @@ ins_t& ins_t::Setup(platform_t& platform, mesh_t& mesh, ins->uSolver=NULL; ins->vSolver=NULL; ins->wSolver=NULL; + ins->uLinearSolver=NULL; ins->vLinearSolver=NULL; + ins->wLinearSolver=NULL; if (settings.compareSetting("TIME INTEGRATOR","EXTBDF3") ||settings.compareSetting("TIME INTEGRATOR","SSBDF3")){ @@ -108,9 +110,24 @@ ins_t& ins_t::Setup(platform_t& platform, mesh_t& mesh, ins->vTau = ins->uSolver->tau; ins->vDisc_c0 = settings.compareSetting("VELOCITY DISCRETIZATION", "CONTINUOUS") ? 1 : 0; - ins->vLinearSolver = linearSolver_t::Setup(Nlocal, Nhalo, - platform, *(ins->vSettings), mesh.comm, - ins->vDisc_c0, ins->uSolver->o_weight); + + //ins->vLinearSolver = linearSolver_t::Setup(Nlocal, Nhalo, + // platform, *(ins->vSettings), mesh.comm, + // ins->vDisc_c0, ins->vSolver->o_weight); + + ins->uLinearSolver = initialGuessSolver_t::Setup(Nlocal, Nhalo, + platform, *(ins->vSettings), mesh.comm, + ins->vDisc_c0, ins->uSolver->o_weight); + + ins->vLinearSolver = initialGuessSolver_t::Setup(Nlocal, Nhalo, + platform, *(ins->vSettings), mesh.comm, + ins->vDisc_c0, ins->vSolver->o_weight); + if (mesh.dim == 3) { + ins->wLinearSolver = initialGuessSolver_t::Setup(Nlocal, Nhalo, + platform, *(ins->vSettings), mesh.comm, + ins->vDisc_c0, ins->wSolver->o_weight); + } + } else { ins->vDisc_c0 = 0; @@ -135,9 +152,14 @@ ins_t& ins_t::Setup(platform_t& platform, mesh_t& mesh, ins->pTau = ins->pSolver->tau; ins->pDisc_c0 = settings.compareSetting("PRESSURE DISCRETIZATION", "CONTINUOUS") ? 1 : 0; - ins->pLinearSolver = linearSolver_t::Setup(Nlocal, Nhalo, - platform, *(ins->pSettings), mesh.comm, - ins->pDisc_c0, ins->pSolver->o_weight); + + //ins->pLinearSolver = linearSolver_t::Setup(Nlocal, Nhalo, + // platform, *(ins->pSettings), mesh.comm, + // ins->pDisc_c0, ins->pSolver->o_weight); + + ins->pLinearSolver = initialGuessSolver_t::Setup(Nlocal, Nhalo, + platform, *(ins->pSettings), mesh.comm, + ins->pDisc_c0, ins->pSolver->o_weight); } //Solver tolerances @@ -434,7 +456,9 @@ ins_t::~ins_t() { if (wSolver) delete wSolver; if (timeStepper) delete timeStepper; if (pLinearSolver) delete pLinearSolver; + if (uLinearSolver) delete uLinearSolver; if (vLinearSolver) delete vLinearSolver; + if (wLinearSolver) delete wLinearSolver; if (subStepper) delete subStepper; if (subcycler) { subcycler->subCycleAdvectionKernel.free(); @@ -443,4 +467,4 @@ ins_t::~ins_t() { if (vTraceHalo) vTraceHalo->Free(); if (pTraceHalo) pTraceHalo->Free(); -} \ No newline at end of file +} diff --git a/solvers/ins/src/insVelocitySolve.cpp b/solvers/ins/src/insVelocitySolve.cpp index dd1916389..e850c37f5 100644 --- a/solvers/ins/src/insVelocitySolve.cpp +++ b/solvers/ins/src/insVelocitySolve.cpp @@ -98,10 +98,10 @@ void ins_t::VelocitySolve(occa::memory& o_U, occa::memory& o_RHS, wSolver->lambda = gamma/nu; // Solve lambda*U - Laplacian*U = rhs - NiterU = uSolver->Solve(*vLinearSolver, o_UH, o_rhsU, velTOL, maxIter, verbose); + NiterU = uSolver->Solve(*uLinearSolver, o_UH, o_rhsU, velTOL, maxIter, verbose); NiterV = vSolver->Solve(*vLinearSolver, o_VH, o_rhsV, velTOL, maxIter, verbose); if (mesh.dim==3) - NiterW = wSolver->Solve(*vLinearSolver, o_WH, o_rhsW, velTOL, maxIter, verbose); + NiterW = wSolver->Solve(*wLinearSolver, o_WH, o_rhsW, velTOL, maxIter, verbose); // merge arrays back, and enter BCs if C0 velocityBCKernel(mesh.Nelements, diff --git a/test/makefile b/test/makefile index c1a353672..9d33e7af0 100644 --- a/test/makefile +++ b/test/makefile @@ -46,7 +46,7 @@ Can use "make verbose=true" for verbose output. endef ifeq (,$(filter info help test test-mesh test-gradient test-advection test-acoustics \ - test-elliptic test-fpe test-cns test-bns test-ins test-core,$(MAKECMDGOALS))) + test-elliptic test-fpe test-cns test-bns test-ins test-initial-guess test-core,$(MAKECMDGOALS))) ifneq (,$(MAKECMDGOALS)) $(error ${TEST_HELP_MSG}) endif @@ -69,7 +69,7 @@ CORE_DIR =${LIBP_DIR}/core TEST_DIR =${LIBP_DIR}/test .PHONY: all help info test test-mesh test-gradient test-advection test-acoustics \ - test-elliptic test-fpe test-cns test-bns test-ins test-core + test-elliptic test-fpe test-cns test-bns test-ins test-initial-guess test-core all: test-all @@ -119,5 +119,8 @@ test-bns: test-ins: @./testIns.py +test-initial-guess: + @./testInitialGuess.py + test-all: @./test.py diff --git a/test/test.py b/test/test.py index c7ad51dfa..1ec270aaf 100755 --- a/test/test.py +++ b/test/test.py @@ -150,8 +150,10 @@ def test(name, cmd, settings, referenceNorm, ranks=1): import testTimeStepper import testLinearSolver import testParAlmond + import testInitialGuess failCount=0; + failCount+=testInitialGuess.main() failCount+=testMesh.main() failCount+=testGradient.main() failCount+=testAdvection.main() @@ -165,4 +167,4 @@ def test(name, cmd, settings, referenceNorm, ranks=1): failCount+=testLinearSolver.main() failCount+=testParAlmond.main() - sys.exit(failCount) \ No newline at end of file + sys.exit(failCount) diff --git a/test/testInitialGuess.py b/test/testInitialGuess.py new file mode 100755 index 000000000..892648641 --- /dev/null +++ b/test/testInitialGuess.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +##################################################################################### +# +#The MIT License (MIT) +# +#Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus +# +#Permission is hereby granted, free of charge, to any person obtaining a copy +#of this software and associated documentation files (the "Software"), to deal +#in the Software without restriction, including without limitation the rights +#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +#copies of the Software, and to permit persons to whom the Software is +#furnished to do so, subject to the following conditions: +# +#The above copyright notice and this permission notice shall be included in all +#copies or substantial portions of the Software. +# +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +#SOFTWARE. +# +##################################################################################### + +from test import * + +insData2D = insDir + "/data/insVortex2D.h" +insData3D = insDir + "/data/insBeltrami3D.h" + +def insSettings(rcformat="2.0", data_file=insData2D, + mesh="BOX", dim=2, element=4, nx=10, ny=10, nz=10, boundary_flag=2, + degree=4, thread_model=device, platform_number=0, device_number=0, + viscosity=0.05, + advection_type="COLLOCATION", + time_integrator="EXTBDF3", start_time=0.0, final_time=0.1, + num_subcycles=4, subcycle_integrator="DOPRI5", + velocity_discretization="CONTINUOUS", + velocity_linear_solver="PCG", + velocity_precon="JACOBI", + velocity_multigrid_smoother="CHEBYSHEV", + velocity_paralmond_cycle="VCYCLE", + velocity_paralmond_smoother="CHEBYSHEV", + velocity_initial_guess_strategy="EXTRAP", + velocity_initial_guess_history_space_dimension=9, + velocity_initial_guess_extrap_degree=3, + velocity_initial_guess_extrap_coeffs_method="MINNORM", + pressure_discretization="CONTINUOUS", + pressure_linear_solver="FPCG", + pressure_precon="MULTIGRID", + pressure_multigrid_smoother="CHEBYSHEV", + pressure_paralmond_cycle="KCYCLE", + pressure_paralmond_smoother="CHEBYSHEV", + pressure_initial_guess_strategy="EXTRAP", + pressure_initial_guess_history_space_dimension=9, + pressure_initial_guess_extrap_degree=3, + pressure_initial_guess_extrap_coeffs_method="MINNORM", + output_to_file="FALSE"): + return [setting_t("FORMAT", rcformat), + setting_t("DATA FILE", data_file), + setting_t("MESH FILE", mesh), + setting_t("MESH DIMENSION", dim), + setting_t("ELEMENT TYPE", element), + setting_t("BOX NX", nx), + setting_t("BOX NY", ny), + setting_t("BOX NZ", nz), + setting_t("BOX DIMX", 1), + setting_t("BOX DIMY", 1), + setting_t("BOX DIMZ", 1), + setting_t("BOX BOUNDARY FLAG", boundary_flag), + setting_t("POLYNOMIAL DEGREE", degree), + setting_t("THREAD MODEL", thread_model), + setting_t("PLATFORM NUMBER", platform_number), + setting_t("DEVICE NUMBER", device_number), + setting_t("VISCOSITY", viscosity), + setting_t("ADVECTION TYPE", advection_type), + setting_t("TIME INTEGRATOR", time_integrator), + setting_t("NUMBER OF SUBCYCLES", num_subcycles), + setting_t("SUBCYCLING TIME INTEGRATOR", subcycle_integrator), + setting_t("START TIME", start_time), + setting_t("FINAL TIME", final_time), + setting_t("VELOCITY DISCRETIZATION", velocity_discretization), + setting_t("VELOCITY LINEAR SOLVER", velocity_linear_solver), + setting_t("VELOCITY PRECONDITIONER", velocity_precon), + setting_t("VELOCITY MULTIGRID SMOOTHER", velocity_multigrid_smoother), + setting_t("VELOCITY PARALMOND CYCLE", velocity_paralmond_cycle), + setting_t("VELOCITY PARALMOND SMOOTHER", velocity_paralmond_smoother), + setting_t("VELOCITY VERBOSE", "TRUE"), + setting_t("VELOCITY INITIAL GUESS STRATEGY", velocity_initial_guess_strategy), + setting_t("VELOCITY INITIAL GUESS HISTORY SPACE DIMENSION", velocity_initial_guess_history_space_dimension), + setting_t("VELOCITY INITIAL GUESS EXTRAP DEGREE", velocity_initial_guess_extrap_degree), + setting_t("VELOCITY INITIAL GUESS EXTRAP COEFFS METHOD", velocity_initial_guess_extrap_coeffs_method), + setting_t("PRESSURE DISCRETIZATION", pressure_discretization), + setting_t("PRESSURE LINEAR SOLVER", pressure_linear_solver), + setting_t("PRESSURE PRECONDITIONER", pressure_precon), + setting_t("PRESSURE MULTIGRID SMOOTHER", pressure_multigrid_smoother), + setting_t("PRESSURE PARALMOND CYCLE", pressure_paralmond_cycle), + setting_t("PRESSURE PARALMOND SMOOTHER", pressure_paralmond_smoother), + setting_t("PRESSURE VERBOSE", "TRUE"), + setting_t("PRESSURE INITIAL GUESS STRATEGY", pressure_initial_guess_strategy), + setting_t("PRESSURE INITIAL GUESS HISTORY SPACE DIMENSION", pressure_initial_guess_history_space_dimension), + setting_t("PRESSURE INITIAL GUESS EXTRAP DEGREE", pressure_initial_guess_extrap_degree), + setting_t("PRESSURE INITIAL GUESS EXTRAP COEFFS METHOD", pressure_initial_guess_extrap_coeffs_method), + setting_t("OUTPUT TO FILE", output_to_file)] + +def main(): + failCount=0; + + + # IG + #test classic + failCount += test(name="testInitialGuess_VextrapPqr", + cmd=insBin, + settings=insSettings(element=3,data_file=insData2D,dim=2, + velocity_initial_guess_strategy="QR", + pressure_initial_guess_strategy="QR"), + referenceNorm=0.820792600394893) + + failCount += test(name="testInitialGuess_VextrapPqr", + cmd=insBin, + settings=insSettings(element=4,data_file=insData2D,dim=2, + velocity_initial_guess_strategy="EXTRAP", + pressure_initial_guess_strategy="QR"), + referenceNorm=0.820483555285512) + + failCount += test(name="testInitialGuess_VqrPextrap", + cmd=insBin, + settings=insSettings(element=6,data_file=insData3D,dim=3, + velocity_initial_guess_strategy="QR", + pressure_initial_guess_strategy="EXTRAP", + nx=6, ny=6, nz=6, degree=2), + referenceNorm=1.20768114201421) + + failCount += test(name="testInitialGuess_VspextrapPspextrap", + cmd=insBin, + settings=insSettings(element=12,data_file=insData3D,dim=3, + velocity_initial_guess_strategy="EXTRAP", + velocity_initial_guess_extrap_coeffs_method="CPQR", + pressure_initial_guess_strategy="EXTRAP", + pressure_initial_guess_extrap_coeffs_method="CPQR", + nx=6, ny=6, nz=6, degree=2), + referenceNorm=1.20751891719514) + + failCount += test(name="testInitialGuess_VclassicPclassic", + cmd=insBin, + settings=insSettings(element=12,data_file=insData3D,dim=3, + nx=6, ny=6, nz=6, degree=2, + advection_type="CUBATURE", + time_integrator="SSBDF3", + velocity_initial_guess_strategy="CLASSIC", + pressure_initial_guess_strategy="CLASSIC"), + referenceNorm=1.20463427860078) + + + #test wth MPI + failCount += test(name="testInitialGuess_MPI", ranks=4, + cmd=insBin, + settings=insSettings(element=3,data_file=insData2D,dim=2,output_to_file="TRUE", + velocity_initial_guess_strategy="QR", + pressure_initial_guess_strategy="QR"), + referenceNorm=0.820785579220035) + + #clean up + for file_name in os.listdir(testDir): + if file_name.endswith('.vtu'): + os.remove(testDir + "/" + file_name) + + return failCount + +if __name__ == "__main__": + failCount=0; + failCount+=main() + sys.exit(failCount)