Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/toxa81/SIRIUS into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
toxa81 committed Apr 21, 2020
2 parents 67a3ab3 + f964e87 commit 8417561
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 36 deletions.
6 changes: 0 additions & 6 deletions src/density/augmentation_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ void Augmentation_operator::generate_pw_coeffs(Radial_integrals_aug<false> const
/* number of beta- radial functions */
int nbrf = atom_type_.mt_radial_basis_size();

PROFILE_START("sirius::Augmentation_operator::generate_pw_coeffs|1");
sddk::mdarray<double, 3> ri_values(nbrf * (nbrf + 1) / 2, 2 * lmax_beta + 1, gvec_.num_gvec_shells_local(), mp__);
#pragma omp parallel for
for (int j = 0; j < gvec_.num_gvec_shells_local(); j++) {
Expand All @@ -107,7 +106,6 @@ void Augmentation_operator::generate_pw_coeffs(Radial_integrals_aug<false> const
}
}
}
PROFILE_STOP("sirius::Augmentation_operator::generate_pw_coeffs|1");

/* number of beta-projectors */
int nbf = atom_type_.mt_basis_size();
Expand Down Expand Up @@ -138,7 +136,6 @@ void Augmentation_operator::generate_pw_coeffs(Radial_integrals_aug<false> const
/* array of plane-wave coefficients */
q_pw_ = mdarray<double, 2>(nbf * (nbf + 1) / 2, 2 * gvec_count, mp__, "q_pw_");

PROFILE_START("sirius::Augmentation_operator::generate_pw_coeffs|2");
switch (atom_type_.parameters().processing_unit()) {
case device_t::CPU: {
#pragma omp parallel for schedule(static)
Expand Down Expand Up @@ -180,7 +177,6 @@ void Augmentation_operator::generate_pw_coeffs(Radial_integrals_aug<false> const

q_pw_.allocate(*mpd__);

PROFILE_START("sirius::Augmentation_operator::generate_pw_coeffs|gpu");
#if defined(__GPU)
int ld0 = static_cast<int>(gc.size(0));
int ld1 = static_cast<int>(gc.size(1));
Expand All @@ -191,12 +187,10 @@ void Augmentation_operator::generate_pw_coeffs(Radial_integrals_aug<false> const
q_pw_.at(memory_t::device), static_cast<int>(q_pw_.size(0)), fourpi_omega);
#endif
q_pw_.copy_to(memory_t::host);
PROFILE_STOP("sirius::Augmentation_operator::generate_pw_coeffs|gpu");

q_pw_.deallocate(memory_t::device);
}
}
PROFILE_STOP("sirius::Augmentation_operator::generate_pw_coeffs|2");

sym_weight_ = mdarray<double, 1>(nbf * (nbf + 1) / 2, mp__, "sym_weight_");
for (int xi2 = 0; xi2 < nbf; xi2++) {
Expand Down
61 changes: 37 additions & 24 deletions src/hamiltonian/hamiltonian_k.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ Hamiltonian_k::~Hamiltonian_k()
}

template <typename T, int what>
std::pair<mdarray<double, 2>, mdarray<double, 2>>
std::pair<sddk::mdarray<double, 2>, sddk::mdarray<double, 2>>
Hamiltonian_k::get_h_o_diag_pw() const
{
PROFILE("sirius::Hamiltonian_k::get_h_o_diag");

auto const& uc = H0_.ctx().unit_cell();

mdarray<double, 2> h_diag(kp_.num_gkvec_loc(), H0_.ctx().num_spins());
mdarray<double, 2> o_diag(kp_.num_gkvec_loc(), H0_.ctx().num_spins());
sddk::mdarray<double, 2> h_diag(kp_.num_gkvec_loc(), H0_.ctx().num_spins());
sddk::mdarray<double, 2> o_diag(kp_.num_gkvec_loc(), H0_.ctx().num_spins());

h_diag.zero();
o_diag.zero();
Expand All @@ -84,23 +84,24 @@ Hamiltonian_k::get_h_o_diag_pw() const
}
}

PROFILE_START("sirius::Hamiltonian_k::get_h_o_diag|1");
/* non-local H contribution */
auto beta_gk_t = kp_.beta_projectors().pw_coeffs_t(0);
matrix<double_complex> beta_gk_tmp(uc.max_mt_basis_size(), kp_.num_gkvec_loc());
matrix<double_complex> beta_gk_tmp(kp_.num_gkvec_loc(), uc.max_mt_basis_size());

for (int iat = 0; iat < uc.num_atom_types(); iat++) {
auto& atom_type = uc.atom_type(iat);
int nbf = atom_type.mt_basis_size();

matrix<T> d_sum;
matrix<double_complex> d_sum;
if (what & 1) {
d_sum = matrix<T>(nbf, nbf);
d_sum = matrix<double_complex>(nbf, nbf);
d_sum.zero();
}

matrix<T> q_sum;
matrix<double_complex> q_sum;
if (what & 2) {
q_sum = matrix<T>(nbf, nbf);
q_sum = matrix<double_complex>(nbf, nbf);
q_sum.zero();
}

Expand All @@ -118,31 +119,43 @@ Hamiltonian_k::get_h_o_diag_pw() const
}
}
}
PROFILE_STOP("sirius::Hamiltonian_k::get_h_o_diag|1");

int offs = uc.atom_type(iat).offset_lo();
for (int ig_loc = 0; ig_loc < kp_.num_gkvec_loc(); ig_loc++) {

PROFILE_START("sirius::Hamiltonian_k::get_h_o_diag|3");
if (what & 1) {
sddk::linalg(linalg_t::blas).gemm('N', 'N', kp_.num_gkvec_loc(), nbf, nbf,
&sddk::linalg_const<double_complex>::one(), &beta_gk_t(0, offs), beta_gk_t.ld(),
&d_sum(0, 0), d_sum.ld(), &sddk::linalg_const<double_complex>::zero(),
&beta_gk_tmp(0, 0), beta_gk_tmp.ld());
#pragma omp parallel
for (int xi = 0; xi < nbf; xi++) {
beta_gk_tmp(xi, ig_loc) = beta_gk_t(ig_loc, offs + xi);
#pragma omp for schedule(static) nowait
for (int ig_loc = 0; ig_loc < kp_.num_gkvec_loc(); ig_loc++) {
/* compute <G+k|beta_xi1> D_{xi1, xi2} <beta_xi2|G+k> contribution from all atoms */
h_diag(ig_loc, ispn) +=
std::real(beta_gk_tmp(ig_loc, xi) * std::conj(beta_gk_t(ig_loc, offs + xi)));
}
}
}

#pragma omp parallel for schedule(static)
for (int ig_loc = 0; ig_loc < kp_.num_gkvec_loc(); ig_loc++) {
for (int xi2 = 0; xi2 < nbf; xi2++) {
for (int xi1 = 0; xi1 < nbf; xi1++) {
if (what & 1) {
/* compute <G+k|beta_xi1> D_{xi1, xi2} <beta_xi2|G+k> contribution from all atoms */
auto z = beta_gk_tmp(xi1, ig_loc) * d_sum(xi1, xi2) * std::conj(beta_gk_tmp(xi2, ig_loc));
h_diag(ig_loc, ispn) += z.real();
}
if (what & 2) {
/* compute <G+k|beta_xi1> Q_{xi1, xi2} <beta_xi2|G+k> contribution from all atoms */
auto z = beta_gk_tmp(xi1, ig_loc) * q_sum(xi1, xi2) * std::conj(beta_gk_tmp(xi2, ig_loc));
o_diag(ig_loc, ispn) += z.real();
}
if (what & 2) {
sddk::linalg(linalg_t::blas).gemm('N', 'N', kp_.num_gkvec_loc(), nbf, nbf,
&sddk::linalg_const<double_complex>::one(), &beta_gk_t(0, offs), beta_gk_t.ld(),
&q_sum(0, 0), d_sum.ld(), &sddk::linalg_const<double_complex>::zero(),
&beta_gk_tmp(0, 0), beta_gk_tmp.ld());
#pragma omp parallel
for (int xi = 0; xi < nbf; xi++) {
#pragma omp for schedule(static) nowait
for (int ig_loc = 0; ig_loc < kp_.num_gkvec_loc(); ig_loc++) {
/* compute <G+k|beta_xi1> Q_{xi1, xi2} <beta_xi2|G+k> contribution from all atoms */
o_diag(ig_loc, ispn) +=
std::real(beta_gk_tmp(ig_loc, xi) * std::conj(beta_gk_t(ig_loc, offs + xi)));
}
}
}
PROFILE_STOP("sirius::Hamiltonian_k::get_h_o_diag|3");
}
}
if (H0_.ctx().processing_unit() == device_t::GPU) {
Expand Down
6 changes: 0 additions & 6 deletions src/k_point/k_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,6 @@ K_point::generate_atomic_wave_functions(std::vector<int> atoms__,
}
}

PROFILE_START("sirius::K_point::generate_atomic_wave_functions|wft");
#pragma omp parallel for schedule(static)
for (int igk_loc = 0; igk_loc < this->num_gkvec_loc(); igk_loc++) {
/* vs = {r, theta, phi} */
Expand Down Expand Up @@ -1013,14 +1012,12 @@ K_point::generate_atomic_wave_functions(std::vector<int> atoms__,
}
}
}
PROFILE_STOP("sirius::K_point::generate_atomic_wave_functions|wft");

for (int ia: atoms__) {

double phase = twopi * dot(gkvec().vk(), unit_cell_.atom(ia).position());
double_complex phase_k = std::exp(double_complex(0.0, phase));

PROFILE_START("sirius::K_point::generate_atomic_wave_functions|1");
/* quickly compute phase factors without calling exp() function */
std::vector<double_complex> phase_gk(num_gkvec_loc());
#pragma omp parallel for schedule(static)
Expand All @@ -1031,9 +1028,7 @@ K_point::generate_atomic_wave_functions(std::vector<int> atoms__,
/* total phase e^{-i(G+k)r_{\alpha}} */
phase_gk[igk_loc] = std::conj(ctx_.gvec_phase_factor(G, ia) * phase_k);
}
PROFILE_STOP("sirius::K_point::generate_atomic_wave_functions|1");

PROFILE_START("sirius::K_point::generate_atomic_wave_functions|2");
int iat = unit_cell_.atom(ia).type_id();
#pragma omp parallel
for (int xi = 0; xi < indexb__(iat)->size(); xi++) {
Expand All @@ -1042,7 +1037,6 @@ K_point::generate_atomic_wave_functions(std::vector<int> atoms__,
wf__.pw_coeffs(0).prime(igk_loc, offset[ia] + xi) = wf_t[iat](igk_loc, xi) * phase_gk[igk_loc];
}
}
PROFILE_STOP("sirius::K_point::generate_atomic_wave_functions|2");
}
}

Expand Down

0 comments on commit 8417561

Please sign in to comment.