Skip to content

Commit

Permalink
Update master branch (electronic-structure#815)
Browse files Browse the repository at this point in the history
* start version 7.4.3
* [bug] Low symmetry lattices (electronic-structure#812)
* [fix] Use MPI grid for setting fft communicators (electronic-structure#813)
* [enhancement] Augmentation operator on GPUs (electronic-structure#803)
* [fix] change spglib tolerance from 1e-4 to 1e-6

Co-authored-by: Simon Pintarelli <1237199+simonpintarelli@users.noreply.github.com>
Co-authored-by: Taillefumier Mathieu <29380261+mtaillefumier@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 31, 2023
1 parent f333226 commit 989c0da
Show file tree
Hide file tree
Showing 44 changed files with 1,051 additions and 1,014 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.18)

project(SIRIUS VERSION 7.4.2)
project(SIRIUS VERSION 7.4.3)

# user variables
set(CREATE_PYTHON_MODULE OFF CACHE BOOL "create sirius Python module")
Expand Down
11 changes: 6 additions & 5 deletions apps/tests/test_wf_fft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using namespace sirius;
void test_wf_fft()
{
//sddk::MPI_grid mpi_grid({2, 3}, sddk::Communicator::world());
mpi::Grid mpi_grid({1, 1}, mpi::Communicator::world());
mpi::Grid mpi_grid({2, 2}, mpi::Communicator::world());

/* creation of simple G+k vector set */
auto gkvec = fft::gkvec_factory(8.0, mpi_grid.communicator());
Expand All @@ -18,7 +18,8 @@ void test_wf_fft()
/* get the FFT box boundaries */
auto fft_grid = fft::get_min_grid(8.0, gkvec->lattice_vectors());

std::vector<int> num_mt_coeffs({10, 20, 30, 10, 20});
//std::vector<int> num_mt_coeffs({10, 20, 30, 10, 20});
std::vector<int> num_mt_coeffs({1});

wf::Wave_functions<double> wf(gkvec, num_mt_coeffs, wf::num_mag_dims(1), wf::num_bands(10), sddk::memory_t::host);
wf::Wave_functions<double> wf_ref(gkvec, num_mt_coeffs, wf::num_mag_dims(1), wf::num_bands(10), sddk::memory_t::host);
Expand All @@ -32,7 +33,7 @@ void test_wf_fft()
}
}
}
auto mg = wf.memory_guard(sddk::memory_t::device, wf::copy_to::device);
//auto mg = wf.memory_guard(sddk::memory_t::device, wf::copy_to::device);

auto pu = sddk::device_t::CPU;

Expand Down Expand Up @@ -61,10 +62,10 @@ void test_wf_fft()
for (int ispn = 0; ispn < 2; ispn++) {

wf::Wave_functions_fft<double> wf_fft(gkvec_fft, wf, wf::spin_index(ispn), wf::band_range(0,10),
wf::shuffle_to::fft_layout | wf::shuffle_to::wf_layout);
wf::shuffle_to::wf_layout);

for (int i = 0; i < wf_fft.num_wf_local(); i++) {
spfft_transform->backward(wf_fft.pw_coeffs_spfft(sddk::memory_t::host, wf::band_index(i)), spfft_pu);
spfft_transform->backward(wf1[ispn].pw_coeffs_spfft(sddk::memory_t::host, wf::band_index(i)), spfft_pu);
spfft_transform->forward(spfft_pu, wf_fft.pw_coeffs_spfft(sddk::memory_t::host, wf::band_index(i)), SPFFT_FULL_SCALING);
}
}
Expand Down
6 changes: 3 additions & 3 deletions apps/unit_tests/test_rlm_deriv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ int test3()
1 + 10 * utils::random<double>(),
1 + 10 * utils::random<double>());

auto rtp = SHT::spherical_coordinates(v);
auto rtp = r3::spherical_coordinates(v);

double dr = 1e-5 * rtp[0];

Expand All @@ -495,8 +495,8 @@ int test3()
r3::vector<double> v2 = v;
v2[x] -= dr;

auto vs1 = SHT::spherical_coordinates(v1);
auto vs2 = SHT::spherical_coordinates(v2);
auto vs1 = r3::spherical_coordinates(v1);
auto vs2 = r3::spherical_coordinates(v2);
std::vector<double> rlm1(lmmax);
std::vector<double> rlm2(lmmax);

Expand Down
4 changes: 2 additions & 2 deletions apps/unit_tests/test_rot_ylm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ int run_test_impl(cmd_args& args)

/* random Cartesian vector */
r3::vector<double> coord(double(rand()) / RAND_MAX, double(rand()) / RAND_MAX, double(rand()) / RAND_MAX);
auto scoord = SHT::spherical_coordinates(coord);
auto scoord = r3::spherical_coordinates(coord);
/* rotated coordinates */
auto coord2 = dot(Rc, coord);
auto scoord2 = SHT::spherical_coordinates(coord2);
auto scoord2 = r3::spherical_coordinates(coord2);

int lmax{10};
sddk::mdarray<T, 1> ylm(utils::lmmax(lmax));
Expand Down
2 changes: 1 addition & 1 deletion doc/doxygen.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ PROJECT_NAME = "SIRIUS"
# control system is used.


PROJECT_NUMBER = "7.4.2"
PROJECT_NUMBER = "7.4.3"

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
19 changes: 17 additions & 2 deletions src/SDDK/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ sddk::memory_pool& get_memory_pool(sddk::memory_t M__);
{ \
if (!(condition__)) { \
std::stringstream _s; \
_s << "Assertion (" << #condition__ << ") failed " \
_s << "Assertion (" << #condition__ << ") failed " \
<< "at line " << __LINE__ << " of file " << __FILE__ << std::endl \
<< "array label: " << label_ << std::endl; \
for (int i = 0; i < N; i++) { \
Expand Down Expand Up @@ -755,6 +755,20 @@ class mdarray_index_descriptor
{
return size_;
}

inline bool check_range(index_type i__) const
{
#ifdef NDEBUG
return true;
#else
if (i__ < begin_ || i__ > end_) {
std::cout << "index " << i__ << " out of range [" << begin_ << ", " << end_ << "]" << std::endl;
return false;
} else {
return true;
}
#endif
}
};

/// Multidimensional array with the column-major (Fortran) order.
Expand Down Expand Up @@ -833,7 +847,8 @@ class mdarray
std::array<index_type, N> i = {args...};

for (int j = 0; j < N; j++) {
mdarray_assert(i[j] >= dims_[j].begin() && i[j] <= dims_[j].end());
//mdarray_assert(dims_[j].check_range(i[j]) && i[j] >= dims_[j].begin() && i[j] <= dims_[j].end());
mdarray_assert(dims_[j].check_range(i[j]));
}

size_t idx = offsets_[0] + i[0];
Expand Down
85 changes: 65 additions & 20 deletions src/SDDK/wave_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,15 +924,31 @@ class Wave_functions_fft : public Wave_functions_base<T>
*/
auto& comm_col = gkvec_fft_->comm_ortho_fft();

auto ncol = sddk::splindex_base<int>::block_size(b__.size(), comm_col.size());
size_t sz = gkvec_fft_->count() * ncol;
sddk::mdarray<std::complex<T>, 1> send_recv_buf(sz, sddk::get_memory_pool(sddk::memory_t::host), "send_recv_buf");
/* in full-potential case leading dimenstion is larger than the number of plane-wave
* coefficients, so we have to copy data into temporary storage with necessary leading
* dimension */
sddk::mdarray<std::complex<T>, 2> wf_tmp;
if (wf_->ld() == wf_->num_pw_) { /* pure plane-wave coeffs */
auto ptr = (wf_->num_pw_ == 0) ? nullptr : wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin());
wf_tmp = sddk::mdarray<std::complex<T>, 2>(ptr, wf_->num_pw_, b__.size());
} else {
wf_tmp = sddk::mdarray<std::complex<T>, 2>(wf_->num_pw_, b__.size(), sddk::get_memory_pool(sddk::memory_t::host));
for (int i = 0; i < b__.size(); i++) {
auto in_ptr = wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin() + i);
std::copy(in_ptr, in_ptr + wf_->num_pw_, wf_tmp.at(sddk::memory_t::host, 0, i));
}
}

auto& row_distr = gkvec_fft_->gvec_slab();
auto* send_buf = (wf_tmp.ld() == 0) ? nullptr : wf_tmp.at(sddk::memory_t::host);

/* local number of columns */
int n_loc = spl_num_wf_.local_size();

sddk::mdarray<std::complex<T>, 1> recv_buf(gkvec_fft_->count() * n_loc,
sddk::get_memory_pool(sddk::memory_t::host), "recv_buf");

auto& row_distr = gkvec_fft_->gvec_slab();

/* send and receive dimensions */
mpi::block_data_descriptor sd(comm_col.size()), rd(comm_col.size());
for (int j = 0; j < comm_col.size(); j++) {
Expand All @@ -942,12 +958,8 @@ class Wave_functions_fft : public Wave_functions_base<T>
sd.calc_offsets();
rd.calc_offsets();

auto send_buf = (this->num_pw_ == 0) ? nullptr : wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin());

{
comm_col.alltoall(send_buf, sd.counts.data(), sd.offsets.data(), send_recv_buf.at(sddk::memory_t::host),
rd.counts.data(), rd.offsets.data());
}
comm_col.alltoall(send_buf, sd.counts.data(), sd.offsets.data(), recv_buf.at(sddk::memory_t::host),
rd.counts.data(), rd.offsets.data());

/* reorder received blocks */
#pragma omp parallel for
Expand All @@ -956,7 +968,7 @@ class Wave_functions_fft : public Wave_functions_base<T>
int offset = row_distr.offsets[j];
int count = row_distr.counts[j];
if (count) {
auto from = &send_recv_buf[offset * n_loc + count * i];
auto from = &recv_buf[offset * n_loc + count * i];
std::copy(from, from + count, this->data_[0].at(sddk::memory_t::host, offset, i));
}
}
Expand Down Expand Up @@ -989,15 +1001,15 @@ class Wave_functions_fft : public Wave_functions_base<T>

auto& comm_col = gkvec_fft_->comm_ortho_fft();

auto ncol = sddk::splindex_base<int>::block_size(b__.size(), comm_col.size());
size_t sz = gkvec_fft_->count() * ncol;
sddk::mdarray<std::complex<T>, 1> send_recv_buf(sz, sddk::get_memory_pool(sddk::memory_t::host), "send_recv_buf");

auto& row_distr = gkvec_fft_->gvec_slab();

/* local number of columns */
int n_loc = spl_num_wf_.local_size();

/* send buffer */
sddk::mdarray<std::complex<T>, 1> send_buf(gkvec_fft_->count() * n_loc,
sddk::get_memory_pool(sddk::memory_t::host), "send_buf");

auto& row_distr = gkvec_fft_->gvec_slab();

/* reorder sending blocks */
#pragma omp parallel for
for (int i = 0; i < n_loc; i++) {
Expand All @@ -1006,7 +1018,7 @@ class Wave_functions_fft : public Wave_functions_base<T>
int count = row_distr.counts[j];
if (count) {
auto from = this->data_[0].at(sddk::memory_t::host, offset, i);
std::copy(from, from + count, &send_recv_buf[offset * n_loc + count * i]);
std::copy(from, from + count, &send_buf[offset * n_loc + count * i]);
}
}
}
Expand All @@ -1018,10 +1030,43 @@ class Wave_functions_fft : public Wave_functions_base<T>
}
sd.calc_offsets();
rd.calc_offsets();
auto* recv_buf = (this->num_pw_ == 0) ? nullptr : wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin());

comm_col.alltoall(send_recv_buf.at(sddk::memory_t::host), sd.counts.data(), sd.offsets.data(), recv_buf,
#if !defined(NDEBUG)
for (int i = 0; i < n_loc; i++) {
for (int j = 0; j < comm_col.size(); j++) {
int offset = row_distr.offsets[j];
int count = row_distr.counts[j];
for (int igg = 0; igg < count; igg++) {
if (send_buf[offset * n_loc + count * i + igg] != this->data_[0](offset + igg, i)) {
RTE_THROW("wrong packing of send buffer");
}
}
}
}
#endif
/* full potential wave-functions have extra muffin-tin part;
* that makes the PW block of data not consecutive and thus we need to copy to a consecutive buffer
* for alltoall */
sddk::mdarray<std::complex<T>, 2> wf_tmp;
if (wf_->ld() == wf_->num_pw_) { /* pure plane-wave coeffs */
auto ptr = (wf_->num_pw_ == 0) ? nullptr : wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin());
wf_tmp = sddk::mdarray<std::complex<T>, 2>(ptr, wf_->num_pw_, b__.size());
} else {
wf_tmp = sddk::mdarray<std::complex<T>, 2>(wf_->num_pw_, b__.size(), sddk::get_memory_pool(sddk::memory_t::host));
}

auto* recv_buf = (wf_tmp.ld() == 0) ? nullptr : wf_tmp.at(sddk::memory_t::host);

comm_col.alltoall(send_buf.at(sddk::memory_t::host), sd.counts.data(), sd.offsets.data(), recv_buf,
rd.counts.data(), rd.offsets.data());

if (wf_->ld() != wf_->num_pw_) {
for (int i = 0; i < b__.size(); i++) {
auto out_ptr = wf_->data_[sp.get()].at(sddk::memory_t::host, 0, b__.begin() + i);
std::copy(wf_tmp.at(sddk::memory_t::host, 0, i),
wf_tmp.at(sddk::memory_t::host, 0, i) + wf_->num_pw_, out_ptr);
}
}
}
if (pp && wf_->gkvec().comm().rank() == 0) {
auto t = utils::time_interval(t0);
Expand Down
4 changes: 2 additions & 2 deletions src/api/sirius_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2768,7 +2768,7 @@ sirius_find_eigen_states(void* const* gs_handler__, void* const* ks_handler__, b
gs.potential().update_atomic_potential();
}
if (precompute_rf__ && *precompute_rf__) {
const_cast<sirius::Unit_cell&>(gs.ctx().unit_cell()).generate_radial_functions();
const_cast<sirius::Unit_cell&>(gs.ctx().unit_cell()).generate_radial_functions(gs.ctx().out());
}
if (precompute_ri__ && *precompute_ri__) {
const_cast<sirius::Unit_cell&>(gs.ctx().unit_cell()).generate_radial_integrals();
Expand Down Expand Up @@ -4033,7 +4033,7 @@ sirius_get_gkvec_arrays(void* const* ks_handler__, int* ik__, int* num_gkvec__,
gkvec(x, igk) = kp->gkvec().template gkvec<sddk::index_domain_t::global>(igk)[x];
gkvec_cart(x, igk) = gkc[x];
}
auto rtp = sirius::SHT::spherical_coordinates(gkc);
auto rtp = r3::spherical_coordinates(gkc);
gkvec_len[igk] = rtp[0];
gkvec_tp(0, igk) = rtp[1];
gkvec_tp(1, igk) = rtp[2];
Expand Down
16 changes: 11 additions & 5 deletions src/band/diag_full_potential.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ Band::diag_full_potential_first_variation_exact(Hamiltonian_k<double>& Hk__) con
/* block size of scalapack 2d block-cyclic distribution */
int bs = ctx_.cyclic_block_size();

auto pcs = env::print_checksum();

la::dmatrix<std::complex<double>> h(ngklo, ngklo, ctx_.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));
la::dmatrix<std::complex<double>> o(ngklo, ngklo, ctx_.blacs_grid(), bs, bs, get_memory_pool(solver.host_memory_t()));

Expand Down Expand Up @@ -76,7 +78,7 @@ Band::diag_full_potential_first_variation_exact(Hamiltonian_k<double>& Hk__) con
}
}

if (ctx_.print_checksum()) {
if (pcs) {
auto z1 = h.checksum(ngklo, ngklo);
auto z2 = o.checksum(ngklo, ngklo);
utils::print_checksum("h_lapw", z1, ctx_.out());
Expand Down Expand Up @@ -107,11 +109,9 @@ Band::diag_full_potential_first_variation_exact(Hamiltonian_k<double>& Hk__) con
}
}

if (ctx_.print_checksum()) {
if (pcs) {
auto z1 = kp.fv_eigen_vectors().checksum(kp.gklo_basis_size(), ctx_.num_fv_states());
if (kp.comm().rank() == 0) {
utils::print_checksum("fv_eigen_vectors", z1, kp.out(1));
}
utils::print_checksum("fv_eigen_vectors", z1, kp.out(1));
}

/* remap to slab */
Expand All @@ -129,6 +129,12 @@ Band::diag_full_potential_first_variation_exact(Hamiltonian_k<double>& Hk__) con
la::constant<std::complex<double>>::zero(), kp.comm().native());
}

if (pcs) {
auto z1 = kp.fv_eigen_vectors_slab().checksum_pw(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx_.num_fv_states()));
auto z2 = kp.fv_eigen_vectors_slab().checksum_mt(sddk::memory_t::host, wf::spin_index(0), wf::band_range(0, ctx_.num_fv_states()));
utils::print_checksum("fv_eigen_vectors_slab", z1 + z2, kp.out(1));
}

/* renormalize wave-functions */
if (ctx_.valence_relativity() == relativity_t::iora) {

Expand Down
2 changes: 1 addition & 1 deletion src/beta_projectors/beta_projectors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class Beta_projectors : public Beta_projectors_base<T>
#pragma omp parallel for
for (int igkloc = 0; igkloc < this->num_gkvec_loc(); igkloc++) {
/* vs = {r, theta, phi} */
auto vs = SHT::spherical_coordinates(this->gkvec_.template gkvec_cart<sddk::index_domain_t::local>(igkloc));
auto vs = r3::spherical_coordinates(this->gkvec_.template gkvec_cart<sddk::index_domain_t::local>(igkloc));
/* compute real spherical harmonics for G+k vector */
std::vector<double> gkvec_rlm(utils::lmmax(this->ctx_.unit_cell().lmax()));
sf::spherical_harmonics(this->ctx_.unit_cell().lmax(), vs[1], vs[2], &gkvec_rlm[0]);
Expand Down
4 changes: 2 additions & 2 deletions src/beta_projectors/beta_projectors_strain_deriv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Beta_projectors_strain_deriv : public Beta_projectors_base<T>
#pragma omp parallel for schedule(static)
for (int igkloc = 0; igkloc < this->num_gkvec_loc(); igkloc++) {
auto gvc = this->gkvec_.template gkvec_cart<sddk::index_domain_t::local>(igkloc);
auto rtp = SHT::spherical_coordinates(gvc);
auto rtp = r3::spherical_coordinates(gvc);

double theta = rtp[1];
double phi = rtp[2];
Expand All @@ -70,7 +70,7 @@ class Beta_projectors_strain_deriv : public Beta_projectors_base<T>
for (int igkloc = 0; igkloc < this->num_gkvec_loc(); igkloc++) {
auto gvc = this->gkvec_.template gkvec_cart<sddk::index_domain_t::local>(igkloc);
/* vs = {r, theta, phi} */
auto gvs = SHT::spherical_coordinates(gvc);
auto gvs = r3::spherical_coordinates(gvc);

/* |G+k|=0 case */
if (gvs[0] < 1e-10) {
Expand Down
12 changes: 12 additions & 0 deletions src/context/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,18 @@ class config_t
}
dict_["/control/output"_json_pointer] = output__;
}
/// Split local G-vectors in chunks to reduce the GPU memory consumption of augmentation operator.
inline auto gvec_chunk_size() const
{
return dict_.at("/control/gvec_chunk_size"_json_pointer).get<int>();
}
inline void gvec_chunk_size(int gvec_chunk_size__)
{
if (dict_.contains("locked")) {
throw std::runtime_error(locked_msg);
}
dict_["/control/gvec_chunk_size"_json_pointer] = gvec_chunk_size__;
}
private:
nlohmann::json& dict_;
};
Expand Down
Loading

0 comments on commit 989c0da

Please sign in to comment.