diff --git a/src/core/cuda/FEMSolver.cu b/src/core/cuda/FEMSolver.cu index 0b8776b..0491133 100644 --- a/src/core/cuda/FEMSolver.cu +++ b/src/core/cuda/FEMSolver.cu @@ -5,13 +5,10 @@ FEMSolver::FEMSolver( verbose_(verbose), filename_(fname), maxLevels_(100), - minRows_(1), maxIters_(100), preInnerIters_(5), postInnerIters_(5), postRelaxes_(1), - preSweeps_(1), - postSweeps_(1), cycleIters_(1), dsType_(0), topSize_(256), @@ -21,9 +18,7 @@ FEMSolver::FEMSolver( convergeType_(ABSOLUTE_CONVERGENCE), tolerance_(1e-6), cycleType_(V_CYCLE), - smootherType_(GAUSSSEIDEL), solverType_(AMG_SOLVER), - algoType_(CLASSICAL), smootherWeight_(1.0), proOmega_(0.67), device_(0), @@ -50,7 +45,7 @@ void FEMSolver::solveFEM(Matrix_ell_h* A_d, //register configuration parameters AMG amg(this); //setup device - amg.setup(*A_d, this->triMesh_, this->tetMesh_, this->verbose_); + amg.setup(*A_d); //print info if (this->verbose_) amg.printGridStatistics(); @@ -58,7 +53,7 @@ void FEMSolver::solveFEM(Matrix_ell_h* A_d, Vector_d_CG x_d(*x_h); Vector_d_CG b_d(*b_h); //run solver - amg.solve(b_d, x_d, this->verbose_); + amg.solve(b_d, x_d); //copy back to host *x_h = Vector_h_CG(x_d); *b_h = Vector_h_CG(b_d); diff --git a/src/core/cuda/aggregator.cu b/src/core/cuda/aggregator.cu index 77871d1..f968cf3 100644 --- a/src/core/cuda/aggregator.cu +++ b/src/core/cuda/aggregator.cu @@ -6,8 +6,7 @@ template Aggregator* Aggregator::allocate(FEMSolver *cfg) { - int misType = cfg->aggregatorType_; - if (misType == 0) + if (cfg->aggregatorType_ == 0) return new MIS_Aggregator < Matrix, Vector > ; else return new RandMIS_Aggregator < Matrix, Vector > ; diff --git a/src/core/cuda/amg.cu b/src/core/cuda/amg.cu index 9011f16..fe59047 100644 --- a/src/core/cuda/amg.cu +++ b/src/core/cuda/amg.cu @@ -12,15 +12,6 @@ template AMG::AMG(FEMSolver * cfg) : fine(0), cfg(cfg) { - cycle_iters = cfg->cycleIters_; - max_iters = cfg->maxIters_; - presweeps = cfg->preSweeps_; - postsweeps = cfg->postSweeps_; - tolerance = cfg->tolerance_; - cycle = cfg->cycleType_; - convergence = cfg->convergeType_; - solver = cfg->solverType_; - DS_type = cfg->dsType_; } template @@ -35,9 +26,9 @@ bool AMG::converged(const Vector &r, ValueType &nrm) // nrm = get_norm(r, norm); nrm = cusp::blas::nrm2(r); - if (convergence == ABSOLUTE_CONVERGENCE) + if (this->cfg->convergeType_ == ABSOLUTE_CONVERGENCE) { - return nrm <= tolerance; + return nrm <= this->cfg->tolerance_; } else //if (convergence==RELATIVE) { if (initial_nrm == -1) @@ -46,7 +37,7 @@ bool AMG::converged(const Vector &r, ValueType &nrm) return false; } //if the norm has been reduced by the tolerance then return true - if (nrm / initial_nrm <= tolerance) + if (nrm / initial_nrm <= this->cfg->tolerance_) return true; else return false; @@ -57,10 +48,7 @@ bool AMG::converged(const Vector &r, ValueType &nrm) * Creates the AMG hierarchy *********************************************************/ template -void AMG::setup(const Matrix_d &Acsr_d, TriMesh* meshPtr, - TetMesh* tetmeshPtr, bool verbose) { - int max_levels = this->cfg->maxLevels_; - int mesh_type = meshPtr == NULL ? 1 : 0; +void AMG::setup(const Matrix_d &Acsr_d) { num_levels = 1; @@ -73,18 +61,14 @@ void AMG::setup(const Matrix_d &Acsr_d, TriMesh* meshPtr, level->level_id = 0; level->nn = Acsr_d.num_rows; - if (mesh_type == 0) - level->m_meshPtr = meshPtr; - else - level->m_tetmeshPtr = tetmeshPtr; + level->m_meshPtr = this->cfg->triMesh_; + level->m_tetmeshPtr = this->cfg->tetMesh_; - int topsize = this->cfg->topSize_; - - if (verbose) std::cout << "Entering AMG setup loop." << std::endl; + if (this->cfg->verbose_) std::cout << "Entering AMG setup loop." << std::endl; while (true) { int N = level->A_d.num_rows; - if (N < topsize || num_levels >= max_levels) + if (N < this->cfg->topSize_ || num_levels >= this->cfg->maxLevels_) { coarsestlevel = num_levels - 1; Matrix_h Atmp = level->A_d; @@ -92,40 +76,40 @@ void AMG::setup(const Matrix_d &Acsr_d, TriMesh* meshPtr, LU = cusp::detail::lu_solver(coarse_dense); break; } - if (verbose) std::cout << "Finished with lu_solver." << std::endl; + if (this->cfg->verbose_) std::cout << "Finished with lu_solver." << std::endl; level->next = AMG_Level::allocate(this); - if (verbose) std::cout << "Finished with AMG_Level_allocate." << std::endl; - level->createNextLevel(verbose); - if (verbose) std::cout << "Finished with createNextLevel call." << std::endl; + if (this->cfg->verbose_) std::cout << "Finished with AMG_Level_allocate." << std::endl; + level->createNextLevel(this->cfg->verbose_); + if (this->cfg->verbose_) std::cout << "Finished with createNextLevel call." << std::endl; if (level->level_id == 0) { Ahyb_d_CG = level->A_d; } - if (verbose) std::cout << "Copied A_d." << std::endl; + if (this->cfg->verbose_) std::cout << "Copied A_d." << std::endl; level->setup(); //allocate smoother !! must be after createNextLevel since A_d is used - if (verbose) std::cout << "level->setup." << std::endl; + if (this->cfg->verbose_) std::cout << "level->setup." << std::endl; level->next->level_id = num_levels; level->next->nn = level->nnout; level->next->m_xadj_d = level->m_xadjout_d; level->next->m_adjncy_d = level->m_adjncyout_d; int nextN = level->next->A_d.num_rows; - if (verbose) std::cout << "level->next finished" << std::endl; + if (this->cfg->verbose_) std::cout << "level->next finished" << std::endl; //resize vectors level->xc_d = Vector_d(nextN, -1); level->bc_d = Vector_d(nextN, -1); - if (verbose) std::cout << "resize vectors finished" << std::endl; + if (this->cfg->verbose_) std::cout << "resize vectors finished" << std::endl; //advance to the next level level = level->next; //increment the level counter num_levels++; - if (verbose) + if (this->cfg->verbose_) std::cout << "Looping with num_levels=" << num_levels << std::endl; } @@ -135,20 +119,21 @@ void AMG::setup(const Matrix_d &Acsr_d, TriMesh* meshPtr, * Launches a single iteration of the outer solver ***************************************************/ template -void AMG::solve_iteration(const Vector_d_CG &b, Vector_d_CG &x, bool verbose) +void AMG::solve_iteration(const Vector_d_CG &b, Vector_d_CG &x) { Vector_d b_d(b); Vector_d x_d(x); - switch (solver) + switch (this->cfg->solverType_) { case AMG_SOLVER: //perform a single cycle on the amg hierarchy - fine->cycle(cycle, b_d, x_d, verbose); + fine->cycle(this->cfg->cycleType_, b_d, x_d, this->cfg->verbose_); x = Vector_d_CG(x_d); break; case PCG_SOLVER: //create a single CG cycle (this will run CG immediatly) - CG_Flex_Cycle(cycle, cycle_iters, fine, Ahyb_d_CG, b, x, tolerance, max_iters, verbose); //DHL + CG_Flex_Cycle(this->cfg->cycleType_, this->cfg->cycleIters_, + fine, Ahyb_d_CG, b, x, this->cfg->tolerance_, this->cfg->maxIters_, this->cfg->verbose_); //DHL break; } } @@ -158,15 +143,16 @@ void AMG::solve_iteration(const Vector_d_CG &b, Vector_d_CG &x, * Solves the AMG system *********************************************************/ template -void AMG::solve(const Vector_d_CG &b_d, Vector_d_CG &x_d, bool verbose) +void AMG::solve(const Vector_d_CG &b_d, Vector_d_CG &x_d) { - if (verbose) + if (this->cfg->verbose_) printf("AMG Solve:\n"); iterations = 0; initial_nrm = -1; - if (verbose) { - std::cout << std::setw(15) << "iter" << std::setw(15) << "time(s)" << std::setw(15) << "residual" << std::setw(15) << "rate" << std::setw(15) << std::endl; + if (this->cfg->verbose_) { + std::cout << std::setw(15) << "iter" << std::setw(15) << "time(s)" << std::setw(15) + << "residual" << std::setw(15) << "rate" << std::setw(15) << std::endl; std::cout << " ----------------------------------------------------\n"; } solve_start = CLOCK(); @@ -174,11 +160,11 @@ void AMG::solve(const Vector_d_CG &b_d, Vector_d_CG &x_d, bool v do { //launch a single solve iteration - solve_iteration(b_d, x_d, verbose); + solve_iteration(b_d, x_d); done = true; //converged(r_d, nrm); - } while (++iterations < max_iters && !done); - if (verbose) + } while (++iterations < this->cfg->maxIters_ && !done); + if (this->cfg->verbose_) std::cout << " ----------------------------------------------------\n"; solve_stop = CLOCK(); @@ -192,7 +178,8 @@ void AMG::printGridStatistics() int total_nnz = 0; AMG_Level *level = fine; // DHL std::cout << "AMG Grid:\n"; - std::cout << std::setw(15) << "LVL" << std::setw(10) << "ROWS" << std::setw(18) << "NNZ" << std::setw(10) << "SPRSTY" << std::endl; + std::cout << std::setw(15) << "LVL" << std::setw(10) << "ROWS" << + std::setw(18) << "NNZ" << std::setw(10) << "SPRSTY" << std::endl; std::cout << " ---------------------------------------------\n"; level = fine; @@ -200,7 +187,11 @@ void AMG::printGridStatistics() { total_rows += level->A_d.num_rows; total_nnz += level->A_d.num_entries; - std::cout << std::setw(15) << level->level_id << std::setw(10) << level->A_d.num_rows << std::setw(18) << level->A_d.num_entries << std::setw(10) << std::setprecision(3) << level->A_d.num_entries / (double)(level->A_d.num_rows * level->A_d.num_cols) << std::setprecision(6) << std::endl; + std::cout << std::setw(15) << level->level_id << std::setw(10) << + level->A_d.num_rows << std::setw(18) << level->A_d.num_entries << + std::setw(10) << std::setprecision(3) << + level->A_d.num_entries / (double)(level->A_d.num_rows * level->A_d.num_cols) + << std::setprecision(6) << std::endl; level = level->next; } diff --git a/src/core/cuda/amg_level.cu b/src/core/cuda/amg_level.cu index dcb8140..6791113 100644 --- a/src/core/cuda/amg_level.cu +++ b/src/core/cuda/amg_level.cu @@ -5,21 +5,15 @@ template AMG_Level::~AMG_Level() { - if(smoother != 0) delete smoother; - if(next != 0) delete next; + if (smoother != 0) delete smoother; + if (next != 0) delete next; } #include template AMG_Level* AMG_Level::allocate(AMG*amg) { - AlgorithmType alg = amg->cfg->algoType_; - switch(alg) - { - case CLASSICAL: - default: - return new SmoothedMG_AMG_Level (amg); - } + return new SmoothedMG_AMG_Level(amg); } /****************************************************** @@ -28,111 +22,109 @@ AMG_Level* AMG_Level::allocate(AMG void AMG_Level::cycle(CycleType cycle, Vector_d& b_d, Vector_d& x_d, bool verbose) { - if(isCoarsest()) //solve directly - { - cusp::array1d temp_b(b_d); - cusp::array1d temp_x(x_d.size()); - amg->LU(temp_b, temp_x); - x_d = temp_x; - return; - } - else - { - switch (DS_type) - { - case 0: - smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, - level_id, largestblocksize, largest_num_entries, verbose); - break; - case 1: - smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); - break; - case 2: - smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); - break; - default: - cout << "Wrong DStype 1!" << endl; - exit(0); - - } - next->cycle(V_CYCLE, bc_d, xc_d, verbose); - switch (DS_type) - { - case 0: - smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, - level_id, largestblocksize, largest_num_entries); - break; - case 1: - smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); - break; - case 2: - smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); - break; - default: - cout << "Wrong DStype 0!" << endl; - exit(0); - - } - - } + if (isCoarsest()) //solve directly + { + cusp::array1d temp_b(b_d); + cusp::array1d temp_x(x_d.size()); + amg->LU(temp_b, temp_x); + x_d = temp_x; + return; + } else + { + switch (amg->cfg->dsType_) + { + case 0: + smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, + level_id, largestblocksize, largest_num_entries, verbose); + break; + case 1: + smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); + break; + case 2: + smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); + break; + default: + cout << "Wrong DStype 1!" << endl; + exit(0); + + } + next->cycle(V_CYCLE, bc_d, xc_d, verbose); + switch (amg->cfg->dsType_) + { + case 0: + smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, + level_id, largestblocksize, largest_num_entries); + break; + case 1: + smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); + break; + case 2: + smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); + break; + default: + cout << "Wrong DStype 0!" << endl; + exit(0); + + } + + } } template void AMG_Level::cycle_level0(CycleType cycle, Vector_d_CG &b_d_CG, Vector_d_CG &x_d_CG, bool verbose) { - if(isCoarsest()) //solve directly - { - cusp::array1d temp_b = b_d_CG; - cusp::array1d temp_x(x_d_CG.size()); - amg->LU(temp_b, temp_x); - x_d_CG = temp_x; - - return; - } - else - { - Vector_d b_d = b_d_CG; - Vector_d x_d(x_d_CG.size(), 0.0); - switch (DS_type) - { - case 0: - smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, - level_id, largestblocksize, largest_num_entries, verbose); - break; - case 1: - smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); - break; - case 2: - smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); - break; - default: - cout << "Wrong DStype 1!" << endl; - exit(0); - - } - next->cycle(V_CYCLE, bc_d, xc_d,verbose); - switch (DS_type) - { - case 0: - smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, - level_id, largestblocksize, largest_num_entries); - break; - case 1: - smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); - break; - case 2: - smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); - break; - default: - cout << "Wrong DStype 0!" << endl; - exit(0); - - } - - x_d_CG = x_d; - b_d_CG = b_d; - } + if (isCoarsest()) //solve directly + { + cusp::array1d temp_b = b_d_CG; + cusp::array1d temp_x(x_d_CG.size()); + amg->LU(temp_b, temp_x); + x_d_CG = temp_x; + + return; + } else + { + Vector_d b_d = b_d_CG; + Vector_d x_d(x_d_CG.size(), 0.0); + switch (amg->cfg->dsType_) + { + case 0: + smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, + level_id, largestblocksize, largest_num_entries, verbose); + break; + case 1: + smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize); + break; + case 2: + smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); + break; + default: + cout << "Wrong DStype 1!" << endl; + exit(0); + + } + next->cycle(V_CYCLE, bc_d, xc_d, verbose); + switch (amg->cfg->dsType_) + { + case 0: + smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, + level_id, largestblocksize, largest_num_entries); + break; + case 1: + smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize); + break; + case 2: + smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row); + break; + default: + cout << "Wrong DStype 0!" << endl; + exit(0); + + } + + x_d_CG = x_d; + b_d_CG = b_d; + } } #include @@ -140,17 +132,17 @@ void AMG_Level::cycle_level0(CycleType cycle, Vector_d_CG &b_d_C template void AMG_Level::setup() { - smoother = Smoother::allocate(amg->cfg, A_d); + smoother = Smoother::allocate(amg->cfg, A_d); } template std::vector AMG_Level::getOriginalRows() { - return originalRow; + return originalRow; } /**************************************** * Explict instantiations ***************************************/ -template class AMG_Level; -template class AMG_Level; +template class AMG_Level < Matrix_h, Vector_h > ; +template class AMG_Level < Matrix_d, Vector_d > ; diff --git a/src/core/cuda/gauss_seidel.cu b/src/core/cuda/gauss_seidel.cu index ecfb1f4..f1fddc6 100644 --- a/src/core/cuda/gauss_seidel.cu +++ b/src/core/cuda/gauss_seidel.cu @@ -63,7 +63,7 @@ gauss_seidel::gauss_seidel(FEMSolver *cfg, const Matrix_d& Ainit { cusp::detail::extract_diagonal(Ainit, this->diag); post_relaxes = cfg->postRelaxes_; - weight = cfg->smootherWeight_; + smootherWeight = cfg->smootherWeight_; nPreInnerIter = cfg->preInnerIters_; nPostInnerIter = cfg->postInnerIters_; } @@ -128,7 +128,7 @@ void gauss_seidel::smooth(const Matrix_d &A, const Vector_d thrust::raw_pointer_cast(&A.values[0]), thrust::raw_pointer_cast(&diag[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0])); } @@ -416,7 +416,7 @@ void gauss_seidel::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullCsr(const cusp::csr_matrix::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2094,7 +2094,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2114,7 +2114,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2134,7 +2134,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2154,7 +2154,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2174,7 +2174,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2194,7 +2194,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2213,7 +2213,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2232,7 +2232,7 @@ void gauss_seidel::preRRRFullSymmetric(const cusp::coo_matri thrust::raw_pointer_cast(&permutation[0]), thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2479,7 +2479,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2499,7 +2499,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2520,7 +2520,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2541,7 +2541,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2562,7 +2562,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2583,7 +2583,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2604,7 +2604,7 @@ void gauss_seidel::preRRRFullSymmetricSync(const cusp::coo_m thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&residual[0]), nPreInnerIter); @@ -2688,7 +2688,7 @@ void gauss_seidel::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::preRRRFull(const cusp::ell_matrix::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4476,7 +4476,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4499,7 +4499,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4522,7 +4522,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4544,7 +4544,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4566,7 +4566,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4589,7 +4589,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4612,7 +4612,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4635,7 +4635,7 @@ void gauss_seidel::postPCRFullSymmetric(const cusp::coo_matr thrust::raw_pointer_cast(&AinBlockIdx[0]), thrust::raw_pointer_cast(&AoutBlockIdx[0]), thrust::raw_pointer_cast(&b[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4849,7 +4849,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4873,7 +4873,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4898,7 +4898,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4923,7 +4923,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4948,7 +4948,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4973,7 +4973,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -4998,7 +4998,7 @@ void gauss_seidel::postPCRFullSymmetricSync(const cusp::coo_ thrust::raw_pointer_cast(&b[0]), thrust::raw_pointer_cast(&segSyncIdx[0]), thrust::raw_pointer_cast(&partSyncIdx[0]), - weight, + smootherWeight, thrust::raw_pointer_cast(&x[0]), thrust::raw_pointer_cast(&xout[0]), nPostInnerIter); @@ -5090,7 +5090,7 @@ void gauss_seidel::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFull(const cusp::ell_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix::postPCRFullCsr(const cusp::csr_matrix -__global__ void element_loop_kernel(size_t nv, ValueType *d_nx, ValueType *d_ny, size_t ne, IndexType *d_tri0, IndexType *d_tri1, IndexType *d_tri2, - ValueType *d_ellvalues, IndexType *d_ellcolidx, size_t nrow, size_t num_col_per_row, size_t pitch, - ValueType *d_b) +__global__ void element_loop_kernel(size_t nv, ValueType *d_nx, + ValueType *d_ny, size_t ne, IndexType *d_tri0, IndexType *d_tri1, + IndexType *d_tri2,ValueType *d_ellvalues, IndexType *d_ellcolidx, + size_t nrow, size_t num_col_per_row, size_t pitch, ValueType *d_b) { ValueType coeffs[9]; ValueType stiffMat[6]; @@ -298,7 +299,8 @@ __global__ void element_loop_kernel(size_t nv, ValueType *d_nx, ValueType *d_ny, ValueType x[3]; ValueType y[3]; - for (int eleidx = blockIdx.x * blockDim.x + threadIdx.x; eleidx < ne; eleidx += blockDim.x * gridDim.x) + for (int eleidx = blockIdx.x * blockDim.x + threadIdx.x; + eleidx < ne; eleidx += blockDim.x * gridDim.x) { ValueType Ax, Ay, Az, Bx, By, Bz, Cx, Cy, Cz; ValueType AB0, AB1, AB2, AC0, AC1, AC2, r0, r1, r2, a, b, c; @@ -315,20 +317,21 @@ __global__ void element_loop_kernel(size_t nv, ValueType *d_nx, ValueType *d_ny, y[1] = d_ny[ids[1]]; y[2] = d_ny[ids[2]]; - ValueType TArea = fabs(x[0] * y[2] - x[0] * y[1] + x[1] * y[0] - x[1] * y[2] + x[2] * y[1] - x[2] * y[0]) / 2.0; + ValueType TArea = fabs(x[0] * y[2] - x[0] * y[1] + + x[1] * y[0] - x[1] * y[2] + x[2] * y[1] - x[2] * y[0]) / 2.0; #pragma unroll for (int i = 0; i < 3; i++) { Ax = x[i % 3]; Ay = y[i % 3]; - Az = 1.0; + Az = 0.0; Bx = x[(i + 1) % 3]; By = y[(i + 1) % 3]; Bz = 0.0; Cx = x[(i + 2) % 3]; Cy = y[(i + 2) % 3]; - Cz = 0.0; + Cz = 1.0; //compute AB cross AC AB0 = Bx - Ax; diff --git a/src/core/cuda/smoothedMG_amg_level.cu b/src/core/cuda/smoothedMG_amg_level.cu index c1a5f72..f1cf6e1 100644 --- a/src/core/cuda/smoothedMG_amg_level.cu +++ b/src/core/cuda/smoothedMG_amg_level.cu @@ -16,12 +16,7 @@ template SmoothedMG_AMG_Level::SmoothedMG_AMG_Level(AMG *amg) : AMG_Level(amg) { - prosmoothomega = amg->cfg->proOmega_; aggregator = Aggregator::allocate(amg->cfg); // DHL - DS_type = amg->cfg->dsType_; - metis_size = amg->cfg->metisSize_; - mesh_type = amg->cfg->tetMesh_ == NULL ? 0 : 1; - part_max_size = amg->cfg->partitionMaxSize_; } template @@ -119,7 +114,8 @@ __global__ void matrixpermute_csr_kernel(int np, int num_entries, } template <> -void SmoothedMG_AMG_Level::generateMatrixCsr(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel) +void SmoothedMG_AMG_Level::generateMatrixCsr(IdxVector_d &permutation, + IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel) { int numpart = partitionIdx.size() - 1; @@ -312,7 +308,7 @@ void SmoothedMG_AMG_Level::generateProlongatorFull_d(IdxVect thrust::sequence(T.row_indices.begin(), T.row_indices.end()); cusp::detail::offsets_to_indices(aggregateIdx, T.column_indices); thrust::fill(T.values.begin(), T.values.end(), 1); - const AMGType lambda = prosmoothomega; + const AMGType lambda = this->amg->cfg->proOmega_; // temp <- -lambda * S(i,j) * T(j,k) Matrix_coo_d temp(S.num_rows, T.num_cols, S.num_entries + T.num_entries); @@ -401,11 +397,13 @@ void SmoothedMG_AMG_Level::createNextLevel(bool verbose) //compute permutation if(this->level_id == 0) { - if(mesh_type == 0) + if(this->amg->cfg->triMesh_ != NULL) { if (verbose) std::cout << "calling computePermutation_d with tri mesh." << std::endl; - aggregator->computePermutation_d(this->m_meshPtr, permutation_d, ipermutation_d, aggregateIdx_d, partitionIdx_d, partitionlabel_d, m_xadjout_d, m_adjncyout_d, metis_size, part_max_size, verbose);// DHL + aggregator->computePermutation_d(this->m_meshPtr, permutation_d, ipermutation_d, + aggregateIdx_d, partitionIdx_d, partitionlabel_d, m_xadjout_d, m_adjncyout_d, + this->amg->cfg->metisSize_, this->amg->cfg->partitionMaxSize_, verbose);// DHL if (verbose) std::cout << "computePermutation_d called with tri mesh." << std::endl; } @@ -413,7 +411,10 @@ void SmoothedMG_AMG_Level::createNextLevel(bool verbose) { if (verbose) std::cout << "calling computePermutation_d with tet mesh." << std::endl; - aggregator->computePermutation_d(this->m_tetmeshPtr, permutation_d, ipermutation_d, aggregateIdx_d, partitionIdx_d, partitionlabel_d, m_xadjout_d, m_adjncyout_d, metis_size, part_max_size, verbose); // DHL + aggregator->computePermutation_d(this->m_tetmeshPtr, permutation_d, + ipermutation_d, aggregateIdx_d, partitionIdx_d, partitionlabel_d, + m_xadjout_d, m_adjncyout_d, this->amg->cfg->metisSize_, + this->amg->cfg->partitionMaxSize_, verbose); // DHL if (verbose) std::cout << "computePermutation_d called with tet mesh." << std::endl; } @@ -422,7 +423,10 @@ void SmoothedMG_AMG_Level::createNextLevel(bool verbose) { if (verbose) std::cout << "calling computePermutation_d with level_id != 0." << std::endl; - aggregator->computePermutation_d(m_xadj_d, m_adjncy_d, permutation_d, ipermutation_d, aggregateIdx_d, partitionIdx_d, partitionlabel_d, m_xadjout_d, m_adjncyout_d, metis_size, part_max_size, verbose); // DHL + aggregator->computePermutation_d(m_xadj_d, m_adjncy_d, permutation_d, + ipermutation_d, aggregateIdx_d, partitionIdx_d, partitionlabel_d, + m_xadjout_d, m_adjncyout_d, this->amg->cfg->metisSize_, + this->amg->cfg->partitionMaxSize_, verbose); // DHL if (verbose) std::cout << "computePermutation_d called with level_id != 0." << std::endl; } @@ -442,7 +446,7 @@ void SmoothedMG_AMG_Level::createNextLevel(bool verbose) //generate matrix int num_per_thread; - switch(DS_type) + switch (amg->cfg->dsType_) { case 0: diff --git a/src/core/include/FEMSolver.h b/src/core/include/FEMSolver.h index 5146d4b..68f252a 100644 --- a/src/core/include/FEMSolver.h +++ b/src/core/include/FEMSolver.h @@ -19,7 +19,6 @@ #include #include "smoothers/smoother.h" #include "cycles/cycle.h" -#include "convergence.h" #include "amg_level.h" #ifdef WIN32 @@ -59,13 +58,10 @@ class FEMSolver { bool verbose_; // output verbosity std::string filename_; // mesh file name int maxLevels_; // the maximum number of levels - int minRows_; // the minimum number of rows in a level int maxIters_; // the maximum solve iterations int preInnerIters_; // the pre inner iterations for GSINNER int postInnerIters_; // the post inner iterations for GSINNER int postRelaxes_; // the number of post relax iterations - int preSweeps_; // the number of presmooth iterations - int postSweeps_; // the number of postsmooth iterations int cycleIters_; // the number of CG iterations per outer iteration int dsType_; // data structure type int topSize_; // max size of coarsest level @@ -75,9 +71,7 @@ class FEMSolver { ConvergenceType convergeType_; // the convergence tolerance algorithm double tolerance_; // the convergence tolerance CycleType cycleType_; // the cycle algorithm - SmootherType smootherType_; // the smoothing algorithm SolverType solverType_; // the solving algorithm - AlgorithmType algoType_; // the AMG algorithm double smootherWeight_; // the weight parameter used in a smoother double proOmega_; // the weight parameter used in prolongator smoother int device_; // the GPU device number to specify diff --git a/src/core/include/amg.h b/src/core/include/amg.h index bbd2e31..f66513b 100644 --- a/src/core/include/amg.h +++ b/src/core/include/amg.h @@ -4,42 +4,17 @@ template class AMG; enum SolverType {AMG_SOLVER,PCG_SOLVER}; -#include +enum ConvergenceType { ABSOLUTE_CONVERGENCE, RELATIVE_CONVERGENCE }; #include #include #include #include -#include #include #include "TriMesh.h" #include "tetmesh.h" -inline const char* getString(SolverType p) { - switch(p) - { - case PCG_SOLVER: - return "Preconditioned CG"; - case AMG_SOLVER: - return "AMG"; - default: - return "UNKNOWN"; - } -} - -template <> -inline SolverType getValue(const char* name) { - if(strncmp(name,"AMG",100)==0) - return AMG_SOLVER; - else if(strncmp(name,"PCG",100)==0) - return PCG_SOLVER; - - char error[100]; - sprintf(error,"Solver type '%s' is not defined",name); - FatalError(error); -} - /********************************************************* * AMG Class * This class provides the user interface to the AMG @@ -56,10 +31,10 @@ class AMG AMG(FEMSolver * cfg); ~AMG(); - void solve(const Vector_d_CG &b, Vector_d_CG &x, bool verbose = false); - void solve_iteration(const Vector_d_CG &b, Vector_d_CG &x, bool verbose = false); + void solve(const Vector_d_CG &b, Vector_d_CG &x); + void solve_iteration(const Vector_d_CG &b, Vector_d_CG &x); - void setup(const Matrix_d &Acsr_d, TriMesh* meshPtr, TetMesh* tetmeshPtr, bool verbose = false); + void setup(const Matrix_d &Acsr_d); void printGridStatistics(); @@ -76,19 +51,10 @@ class AMG AMG_Level* fine; ValueType initial_nrm; - CGType tolerance; int iterations; - int max_iters; - int cycle_iters; int num_levels; int coarsestlevel; - int presweeps,postsweeps; - CycleType cycle; -// NormType norm; - ConvergenceType convergence; - int DS_type; - SolverType solver; Matrix_hyb_d_CG Ahyb_d_CG; double solve_start, solve_stop; diff --git a/src/core/include/amg_level.h b/src/core/include/amg_level.h index bf9d96e..0f9d908 100644 --- a/src/core/include/amg_level.h +++ b/src/core/include/amg_level.h @@ -2,22 +2,6 @@ #define __AMG_LEVEL_H__ template class AMG_Level; -enum AlgorithmType -{ - CLASSICAL, SAMG, SAMGFULL -}; - -inline const char* getString(AlgorithmType p) -{ - switch (p) - { - case CLASSICAL: - return "CLASSICAL"; - default: - return "UNKNOWN"; - } -} - #include #include #include @@ -29,20 +13,6 @@ inline const char* getString(AlgorithmType p) #include "tetmesh.h" #include -template <> -inline AlgorithmType getValue(const char* name) { - if(strncmp(name,"CLASSICAL",100)==0) - return CLASSICAL; - else if(strncmp(name,"SAMG",100)==0) - return SAMG; - else if(strncmp(name,"SAMGFULL",100)==0) - return SAMGFULL; - - char error[100]; - sprintf(error,"Algorithm '%s' is not defined",name); - FatalError(error); -} - /******************************************************** * AMG Level class: * This class is a base class for AMG levels. This @@ -56,9 +26,7 @@ template public: AMG_Level(AMG *amg) : smoother(0), amg(amg), next(0), init(false) - { - DS_type = amg->cfg->dsType_; - }; + {}; virtual ~AMG_Level(); virtual void restrictResidual(const Vector &r, Vector &rr) = 0; @@ -169,7 +137,6 @@ template AMG* amg; AMG_Level* next; - int DS_type; int largest_num_entries; int largest_num_per_row; int largest_num_segment; diff --git a/src/core/include/convergence.h b/src/core/include/convergence.h deleted file mode 100644 index dffa0b4..0000000 --- a/src/core/include/convergence.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __CONVERGENCE_H__ -#define __CONVERGENCE_H__ -#include - -enum ConvergenceType {ABSOLUTE_CONVERGENCE,RELATIVE_CONVERGENCE}; -inline const char* getString(ConvergenceType p) { - switch(p) - { - case ABSOLUTE_CONVERGENCE: - return "ABSOLUTE"; - case RELATIVE_CONVERGENCE: - return "RELATIVE"; - default: - return "UNKNOWN"; - } -} - -template <> -inline ConvergenceType getValue(const char* name) { - if(strncmp(name,"ABSOLUTE",100)==0) - return ABSOLUTE_CONVERGENCE; - else if(strncmp(name,"RELATIVE",100)==0) - return RELATIVE_CONVERGENCE; - - char error[100]; - sprintf(error,"ConvergenceType '%s' is not defined",name); - FatalError(error); -} -#endif diff --git a/src/core/include/cycles/cgcycle.h b/src/core/include/cycles/cgcycle.h index 4c193ad..4e61b7f 100644 --- a/src/core/include/cycles/cgcycle.h +++ b/src/core/include/cycles/cgcycle.h @@ -12,6 +12,8 @@ template class CG_Flex_Cycle { public: typedef typename Matrix::value_type ValueType; - CG_Flex_Cycle(CycleType next_cycle, int num_iters, AMG_Level *next, const Matrix_hyb_d_CG &Aell, const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose = false); + CG_Flex_Cycle(CycleType next_cycle, int num_iters, + AMG_Level *next, const Matrix_hyb_d_CG &Aell, + const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose = false); }; #endif diff --git a/src/core/include/cycles/cycle.h b/src/core/include/cycles/cycle.h index a1c1e90..7ad9333 100644 --- a/src/core/include/cycles/cycle.h +++ b/src/core/include/cycles/cycle.h @@ -3,43 +3,11 @@ enum CycleType {V_CYCLE,W_CYCLE,F_CYCLE,K_CYCLE}; -#include #include #include -inline const char* getString(CycleType p) { - switch(p) - { - case V_CYCLE: - return "V Cycle"; - case W_CYCLE: - return "W Cycle"; - case F_CYCLE: - return "F Cycle"; - case K_CYCLE: - return "K Cycle"; - default: - return "UNKNOWN"; - } -} - -template <> -inline CycleType getValue(const char* name) { - if(strncmp(name,"V",100)==0) - return V_CYCLE; - else if(strncmp(name,"W",100)==0) - return W_CYCLE; - else if(strncmp(name,"F",100)==0) - return F_CYCLE; - else if(strncmp(name,"K",100)==0) - return K_CYCLE; - - char error[100]; - sprintf(error,"Cycle '%s' is not defined",name); - FatalError(error); -} - -template void dispatch_cycle(int num_iters, CycleType cycle, AMG_Level *level, const Vector& b, Vector &x); +template void dispatch_cycle(int num_iters, + CycleType cycle, AMG_Level *level, const Vector& b, Vector &x); #include @@ -51,7 +19,8 @@ template void dispatch_cycle(int num_iters, CycleTy * Dispatches the cycle that is passed in *******************************************************/ template -void dispatch_cycle(int num_iters, CycleType cycle, AMG_Level *level, const Vector& b, Vector &x) { +void dispatch_cycle(int num_iters, CycleType cycle, AMG_Level + *level, const Vector& b, Vector &x) { switch(cycle) { case V_CYCLE: V_Cycle(level,b,x); diff --git a/src/core/include/getvalue.h b/src/core/include/getvalue.h deleted file mode 100644 index aafd6e4..0000000 --- a/src/core/include/getvalue.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __GET_VALUE_H__ -#define __GET_VALUE_H__ - -template -inline T getValue(const char* name); - -template <> -inline int getValue(const char* name) { - return atoi(name); -} - -template <> -inline float getValue(const char* name) { - return static_cast(atof(name)); -} - -template <> -inline double getValue(const char* name) { - return atof(name); -} -#endif diff --git a/src/core/include/smoothedMG/aggregators/aggregator.h b/src/core/include/smoothedMG/aggregators/aggregator.h index fcfd360..407bddb 100644 --- a/src/core/include/smoothedMG/aggregators/aggregator.h +++ b/src/core/include/smoothedMG/aggregators/aggregator.h @@ -2,7 +2,6 @@ #define __AGGREGATOR_H__ template class Aggregator; -#include #include #include #include diff --git a/src/core/include/smoothedMG/smoothedMG_amg_level.h b/src/core/include/smoothedMG/smoothedMG_amg_level.h index b3481b6..71e2ae7 100644 --- a/src/core/include/smoothedMG/smoothedMG_amg_level.h +++ b/src/core/include/smoothedMG/smoothedMG_amg_level.h @@ -35,7 +35,6 @@ class SmoothedMG_AMG_Level : public AMG_Level protected: - void generateMatrixCsr(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel); void generateMatrixSymmetric_d(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, bool verbose = false); void generateProlongatorFull_d(IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx); @@ -44,7 +43,6 @@ class SmoothedMG_AMG_Level : public AMG_Level void generateNextLevelMatrixFull_d(bool verbose = false); void computeAOperator(); - Matrix P, R; Matrix_coo_d P_d, R_d; Matrix_coo_h Acoo; @@ -52,19 +50,10 @@ class SmoothedMG_AMG_Level : public AMG_Level Matrix_coo_h AinCoo; - - Aggregator* aggregator; IdxVector_h aggregateIdx; IdxVector_h partitionIdx; IdxVector_h permutation_h; IdxVector_h ipermutation_h; - - - double prosmoothomega; - int DS_type; - int metis_size; - int mesh_type; - int part_max_size; }; #endif diff --git a/src/core/include/smoothers/gauss_seidel.h b/src/core/include/smoothers/gauss_seidel.h index f2003fd..a32d113 100644 --- a/src/core/include/smoothers/gauss_seidel.h +++ b/src/core/include/smoothers/gauss_seidel.h @@ -150,7 +150,7 @@ class gauss_seidel : public Smoother < Matrix, Vector > int largestnumentries); public: - double weight; + double smootherWeight; int nPreInnerIter; int nPostInnerIter; int post_relaxes; diff --git a/src/core/include/smoothers/smoother.h b/src/core/include/smoothers/smoother.h index ae69ad5..8695479 100644 --- a/src/core/include/smoothers/smoother.h +++ b/src/core/include/smoothers/smoother.h @@ -7,54 +7,11 @@ enum SmootherType JACOBI, JACOBI_NO_CUSP, GAUSSSEIDEL, POLYNOMIAL, GSINNER }; -#include #include #include class FEMSolver; -inline const char* getString(SmootherType p) -{ - switch (p) - { - case JACOBI: - return "JACOBI"; - case JACOBI_NO_CUSP: //TODO remove cusp jacobi and rename - return "JACOBI_NO_CUSP"; - case GAUSSSEIDEL: - return "GAUSSSEIDEL"; - case POLYNOMIAL: - return "POLYNOMIAL"; - case GSINNER: - return "GSINNER"; - default: - return "UNKNOWN"; - } -} - -template <> -inline SmootherType getValue(const char* name) -{ - if (strncmp(name, "JACOBI", 100) == 0) - return JACOBI; - - if (strncmp(name, "JACOBI_NO_CUSP", 100) == 0) - return JACOBI_NO_CUSP; - - if (strncmp(name, "GAUSSSEIDEL", 100) == 0) - return GAUSSSEIDEL; - - if (strncmp(name, "POLYNOMIAL", 100) == 0) - return POLYNOMIAL; - - if (strncmp(name, "GSINNER", 100) == 0) - return GSINNER; - - char error[100]; - sprintf(error, "Smoother '%s' is not defined", name); - FatalError(error); -} - /************************************* * Smoother base class *************************************/ diff --git a/src/examples/example1.cu b/src/examples/example1.cu index 369ea24..b9d541f 100644 --- a/src/examples/example1.cu +++ b/src/examples/example1.cu @@ -39,8 +39,6 @@ int main(int argc, char** argv) // device. We can set our max allocation based on that number int max_registers_used = 64; cfg.partitionMaxSize_ = getMaxThreads(max_registers_used, cfg.device_); - //set the desired algorithm - cfg.algoType_ = /*(AlgorithmType::)*/CLASSICAL; //set the convergence tolerance cfg.tolerance_= 1e-8; //set the weight parameter used in a smoother @@ -63,8 +61,6 @@ int main(int argc, char** argv) cfg.cycleType_ = /*(CycleType::)*/V_CYCLE; //set the convergence tolerance algorithm cfg.convergeType_ = /*(ConvergenceType::)*/ABSOLUTE_CONVERGENCE; - //set the smoothing algorithm - cfg.smootherType_ = GAUSSSEIDEL; //Now we read in the mesh of choice //TriMesh* meshPtr = TriMesh::read("mesh.ply"); //-----if we were reading a Triangle mesh //read in the Tetmesh diff --git a/src/examples/example3.cu b/src/examples/example3.cu index 77c29b8..b17e30d 100644 --- a/src/examples/example3.cu +++ b/src/examples/example3.cu @@ -112,8 +112,6 @@ int main(int argc, char** argv) // device. We can set our max allocation based on that number int max_registers_used = 64; cfg.partitionMaxSize_ = getMaxThreads(max_registers_used, cfg.device_); - //set the desired algorithm - cfg.algoType_ = /*(AlgorithmType::)*/CLASSICAL; //set the convergence tolerance cfg.tolerance_ = 1e-8; //set the weight parameter used in a smoother @@ -136,8 +134,6 @@ int main(int argc, char** argv) cfg.cycleType_ = /*(CycleType::)*/V_CYCLE; //set the convergence tolerance algorithm cfg.convergeType_ = /*(ConvergenceType::)*/ABSOLUTE_CONVERGENCE; - //set the smoothing algorithm - cfg.smootherType_ = GAUSSSEIDEL; //Now we read in the mesh of choice //TriMesh* meshPtr = TriMesh::read("mesh.ply"); //-----if we were reading a Triangle mesh