diff --git a/src/Band/band.hpp b/src/Band/band.hpp index c39b6ecc0..01f0814fd 100644 --- a/src/Band/band.hpp +++ b/src/Band/band.hpp @@ -26,7 +26,7 @@ #define __BAND_HPP__ #include "periodic_function.h" -#include "k_point_set.h" +#include "K_point/k_point_set.hpp" #include "Hamiltonian/hamiltonian.hpp" namespace sirius { diff --git a/src/augmentation_operator.h b/src/Density/augmentation_operator.hpp similarity index 98% rename from src/augmentation_operator.h rename to src/Density/augmentation_operator.hpp index 0fedba041..ce2dc03e3 100644 --- a/src/augmentation_operator.h +++ b/src/Density/augmentation_operator.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess // All rights reserved. // // Redistribution and use in source and binary forms, with or without modification, are permitted provided that @@ -17,13 +17,13 @@ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -/** \file augmentation_operator.h +/** \file augmentation_operator.hpp * * \brief Contains implementation of sirius::Augmentation_operator class. */ -#ifndef __AUGMENTATION_OPERATOR_H__ -#define __AUGMENTATION_OPERATOR_H__ +#ifndef __AUGMENTATION_OPERATOR_HPP__ +#define __AUGMENTATION_OPERATOR_HPP__ #include "radial_integrals.h" @@ -220,6 +220,7 @@ class Augmentation_operator } }; +/// Derivative of augmentation operator PW coefficients with respect to the Cartesian component of G-vector. class Augmentation_operator_gvec_deriv { private: diff --git a/src/Density/density.hpp b/src/Density/density.hpp index e58453ebb..5ffb0b2ad 100644 --- a/src/Density/density.hpp +++ b/src/Density/density.hpp @@ -26,7 +26,7 @@ #define __DENSITY_HPP__ #include "periodic_function.h" -#include "k_point_set.h" +#include "K_point/k_point_set.hpp" #include "simulation_context.h" #include "mixer.h" #include "field4d.hpp" diff --git a/src/Geometry/force.hpp b/src/Geometry/force.hpp index f7e019a4d..92b60195a 100644 --- a/src/Geometry/force.hpp +++ b/src/Geometry/force.hpp @@ -26,12 +26,11 @@ #ifndef __FORCE_HPP__ #define __FORCE_HPP__ -#include "../simulation_context.h" -#include "../periodic_function.h" -#include "../augmentation_operator.h" -#include "../Beta_projectors/beta_projectors.h" -#include "../Beta_projectors/beta_projectors_gradient.h" -//#include "../potential.h" +#include "simulation_context.h" +#include "periodic_function.h" +#include "Density/augmentation_operator.hpp" +#include "Beta_projectors/beta_projectors.h" +#include "Beta_projectors/beta_projectors_gradient.h" #include "non_local_functor.hpp" namespace sirius { diff --git a/src/Geometry/non_local_functor.hpp b/src/Geometry/non_local_functor.hpp index 847c53176..d29544800 100644 --- a/src/Geometry/non_local_functor.hpp +++ b/src/Geometry/non_local_functor.hpp @@ -25,11 +25,10 @@ #ifndef __NON_LOCAL_FUNCTOR_HPP__ #define __NON_LOCAL_FUNCTOR_HPP__ -#include "../simulation_context.h" -#include "../periodic_function.h" -#include "../augmentation_operator.h" -#include "../Beta_projectors/beta_projectors.h" -//#include "../potential.h" +#include "simulation_context.h" +#include "periodic_function.h" +#include "Density/augmentation_operator.hpp" +#include "Beta_projectors/beta_projectors.h" namespace sirius { diff --git a/src/Hamiltonian/hamiltonian.hpp b/src/Hamiltonian/hamiltonian.hpp index 22d06ca77..bc7fdcc21 100644 --- a/src/Hamiltonian/hamiltonian.hpp +++ b/src/Hamiltonian/hamiltonian.hpp @@ -29,7 +29,7 @@ #include "simulation_context.h" #include "Hubbard/hubbard.hpp" #include "Potential/potential.hpp" -#include "k_point.h" +#include "K_point/k_point.hpp" #include "local_operator.hpp" #include "non_local_operator.hpp" #include "memory_pool.hpp" diff --git a/src/Hubbard/hubbard.hpp b/src/Hubbard/hubbard.hpp index 6b0832e8c..dcd9cbde9 100644 --- a/src/Hubbard/hubbard.hpp +++ b/src/Hubbard/hubbard.hpp @@ -4,12 +4,12 @@ #include #include #include "simulation_context.h" -#include "k_point.h" +#include "K_point/k_point.hpp" #include "wave_functions.hpp" #include "Hamiltonian/non_local_operator.hpp" -#include "../Beta_projectors/beta_projectors.h" -#include "../Beta_projectors/beta_projectors_gradient.h" -#include "../Beta_projectors/beta_projectors_strain_deriv.h" +#include "Beta_projectors/beta_projectors.h" +#include "Beta_projectors/beta_projectors_gradient.h" +#include "Beta_projectors/beta_projectors_strain_deriv.h" #include "radial_integrals.h" #include "mixer.h" diff --git a/src/K_point/k_point.hpp b/src/K_point/k_point.hpp index cc5ae7ca0..9efe7ad00 100644 --- a/src/K_point/k_point.hpp +++ b/src/K_point/k_point.hpp @@ -1,3 +1,775 @@ +// Copyright (c) 2013-2016 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file k_point.hpp + * + * \brief Contains definition and partial implementation of sirius::K_point class. + */ + +#ifndef __K_POINT_HPP__ +#define __K_POINT_HPP__ + +#include "periodic_function.h" +#include "matching_coefficients.h" +#include "Beta_projectors/beta_projectors.h" +#include "wave_functions.hpp" + +namespace sirius { + +/// K-point related variables and methods. +/** \image html wf_storage.png "Wave-function storage" + * \image html fv_eigen_vectors.png "First-variational eigen vectors" */ +class K_point +{ + private: + + /// Simulation context. + Simulation_context& ctx_; + + /// Unit cell object. + Unit_cell const& unit_cell_; + + /// Weight of k-point. + double weight_; + + /// Fractional k-point coordinates. + vector3d vk_; + + /// List of G-vectors with |G+k| < cutoff. + std::unique_ptr gkvec_; + + /// G-vector distribution for the FFT transformation. + std::unique_ptr gkvec_partition_; + + /// First-variational eigen values + std::vector fv_eigen_values_; + + /// First-variational eigen vectors, distributed over 2D BLACS grid. + dmatrix fv_eigen_vectors_; + + /// First-variational eigen vectors, distributed in slabs. + std::unique_ptr fv_eigen_vectors_slab_; + + /// Lowest eigen-vectors of the LAPW overlap matrix with small aigen-values. + std::unique_ptr singular_components_; + + /// Second-variational eigen vectors. + /** Second-variational eigen-vectors are stored as one or two \f$ N_{fv} \times N_{fv} \f$ matrices in + * case of non-magnetic or collinear magnetic case or as a single \f$ 2 N_{fv} \times 2 N_{fv} \f$ + * matrix in case of general non-collinear magnetism. */ + dmatrix sv_eigen_vectors_[2]; + + /// Full-diagonalization eigen vectors. + mdarray fd_eigen_vectors_; + + /// First-variational states. + std::unique_ptr fv_states_{nullptr}; + + /// Two-component (spinor) wave functions describing the bands. + std::unique_ptr spinor_wave_functions_{nullptr}; + + /// Two-component (spinor) hubbard wave functions where the S matrix is applied (if ppus). + std::unique_ptr hubbard_wave_functions_{nullptr}; + + /// Band occupation numbers. + mdarray band_occupancies_; + + /// Band energies. + mdarray band_energies_; + + /// LAPW matching coefficients for the row G+k vectors. + /** Used to setup the distributed LAPW Hamiltonian and overlap matrices. */ + std::unique_ptr alm_coeffs_row_{nullptr}; + + /// LAPW matching coefficients for the column G+k vectors. + /** Used to setup the distributed LAPW Hamiltonian and overlap matrices. */ + std::unique_ptr alm_coeffs_col_{nullptr}; + + /// LAPW matching coefficients for the local set G+k vectors. + std::unique_ptr alm_coeffs_loc_{nullptr}; + + /// Mapping between local row and global G+k vecotor index. + /** Used by matching_coefficients class. */ + std::vector igk_row_; + + /// Mapping between local column and global G+k vecotor index. + /** Used by matching_coefficients class. */ + std::vector igk_col_; + + /// Mapping between local and global G+k vecotor index. + /** Used by matching_coefficients class. */ + std::vector igk_loc_; + + /// Number of G+k vectors distributed along rows of MPI grid + int num_gkvec_row_{0}; + + /// Number of G+k vectors distributed along columns of MPI grid + int num_gkvec_col_{0}; + + /// Offset of the local fraction of G+k vectors in the global index. + int gkvec_offset_{0}; + + /// Basis descriptors distributed between rows of the 2D MPI grid. + /** This is a local array. Only MPI ranks belonging to the same column have identical copies of this array. */ + std::vector lo_basis_descriptors_row_; + + /// Basis descriptors distributed between columns of the 2D MPI grid. + /** This is a local array. Only MPI ranks belonging to the same row have identical copies of this array. */ + std::vector lo_basis_descriptors_col_; + + /// List of columns of the Hamiltonian and overlap matrix lo block (local index) for a given atom. + std::vector> atom_lo_cols_; + + /// list of rows of the Hamiltonian and overlap matrix lo block (local index) for a given atom + std::vector> atom_lo_rows_; + + /// Imaginary unit to the power of l. + std::vector zil_; + + /// Mapping between lm and l. + std::vector l_by_lm_; + + /// Column rank of the processors of ScaLAPACK/ELPA diagonalization grid. + int rank_col_; + + /// Number of processors along the columns of the diagonalization grid. + int num_ranks_col_; + + /// Row rank of the processors of ScaLAPACK/ELPA diagonalization grid. + int rank_row_; + + /// Number of processors along the rows of the diagonalization grid. + int num_ranks_row_; + + /// Beta projectors for a local set of G+k vectors. + std::unique_ptr beta_projectors_{nullptr}; + + /// Beta projectors for row G+k vectors. + /** Used to setup the full Hamiltonian in PP-PW case (for verification purpose only) */ + std::unique_ptr beta_projectors_row_{nullptr}; + + /// Beta projectors for column G+k vectors. + /** Used to setup the full Hamiltonian in PP-PW case (for verification purpose only) */ + std::unique_ptr beta_projectors_col_{nullptr}; + + /// Preconditioner matrix for Chebyshev solver. + mdarray p_mtrx_; + + /// Communicator for parallelization inside k-point. + /** This communicator is used to split G+k vectors and wave-functions. */ + Communicator const& comm_; + + /// Communicator between(!!) rows. + Communicator const& comm_row_; + + /// Communicator between(!!) columns. + Communicator const& comm_col_; + + /// Generate G+k and local orbital basis sets. + inline void generate_gklo_basis(); + + /// Test orthonormalization of first-variational states. + inline void test_fv_states(); + + inline double& band_energy_aux(int j__, int ispn__) + { + if (ctx_.num_mag_dims() == 3) { + return band_energies_(j__, 0); + } else { + if (!(ispn__ == 0 || ispn__ == 1)) { + TERMINATE("wrong spin index"); + } + return band_energies_(j__, ispn__); + } + } + + inline double& band_occupancy_aux(int j__, int ispn__) + { + if (ctx_.num_mag_dims() == 3) { + return band_occupancies_(j__, 0); + } else { + if (!(ispn__ == 0 || ispn__ == 1)) { + TERMINATE("wrong spin index"); + } + return band_occupancies_(j__, ispn__); + } + } + + /// Find G+k vectors within the cutoff. + inline void generate_gkvec(double gk_cutoff__) + { + PROFILE("sirius::K_point::generate_gkvec"); + + if (ctx_.full_potential() && (gk_cutoff__ * unit_cell_.max_mt_radius() > ctx_.lmax_apw()) && + comm_.rank() == 0 && ctx_.control().verbosity_ >= 0) { + std::stringstream s; + s << "G+k cutoff (" << gk_cutoff__ << ") is too large for a given lmax (" + << ctx_.lmax_apw() << ") and a maximum MT radius (" << unit_cell_.max_mt_radius() << ")" << std::endl + << "suggested minimum value for lmax : " << int(gk_cutoff__ * unit_cell_.max_mt_radius()) + 1; + WARNING(s); + } + + if (gk_cutoff__ * 2 > ctx_.pw_cutoff()) { + std::stringstream s; + s << "G+k cutoff is too large for a given plane-wave cutoff" << std::endl + << " pw cutoff : " << ctx_.pw_cutoff() << std::endl + << " doubled G+k cutoff : " << gk_cutoff__ * 2; + TERMINATE(s); + } + + /* create G+k vectors; communicator of the coarse FFT grid is used because wave-functions will be transformed + * only on the coarse grid; G+k-vectors will be distributed between MPI ranks assigned to the k-point */ + gkvec_ = std::unique_ptr(new Gvec(vk_, ctx_.unit_cell().reciprocal_lattice_vectors(), gk_cutoff__, comm(), + ctx_.gamma_point())); + + gkvec_partition_ = std::unique_ptr(new Gvec_partition(*gkvec_, ctx_.comm_fft_coarse(), + ctx_.comm_band_ortho_fft_coarse())); + + gkvec_offset_ = gkvec().gvec_offset(comm().rank()); + } + + public: + + /// Constructor + K_point(Simulation_context& ctx__, + double const* vk__, + double weight__) + : ctx_(ctx__) + , unit_cell_(ctx_.unit_cell()) + , weight_(weight__) + , comm_(ctx_.comm_band()) + , comm_row_(ctx_.blacs_grid().comm_row()) + , comm_col_(ctx_.blacs_grid().comm_col()) + { + PROFILE("sirius::K_point::K_point"); + + for (int x = 0; x < 3; x++) { + vk_[x] = vk__[x]; + } + + band_occupancies_ = mdarray(ctx_.num_bands(), ctx_.num_spin_dims()); + band_occupancies_.zero(); + band_energies_ = mdarray(ctx_.num_bands(), ctx_.num_spin_dims()); + band_energies_.zero(); + + num_ranks_row_ = comm_row_.size(); + num_ranks_col_ = comm_col_.size(); + + rank_row_ = comm_row_.rank(); + rank_col_ = comm_col_.rank(); + } + + /// Initialize the k-point related arrays and data. + inline void initialize(); + + inline void update() + { + PROFILE("sirius::K_point::update"); + + gkvec_->lattice_vectors(ctx_.unit_cell().reciprocal_lattice_vectors()); + + if (ctx_.full_potential()) { + if (ctx_.iterative_solver_input().type_ == "exact") { + alm_coeffs_row_ = std::unique_ptr( + new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_row(), igk_row_, gkvec())); + alm_coeffs_col_ = std::unique_ptr( + new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_col(), igk_col_, gkvec())); + } + alm_coeffs_loc_ = std::unique_ptr( + new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_loc(), igk_loc_, gkvec())); + } + + if (!ctx_.full_potential()) { + /* compute |beta> projectors for atom types */ + beta_projectors_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_loc_)); + + if (ctx_.iterative_solver_input().type_ == "exact") { + beta_projectors_row_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_row_)); + beta_projectors_col_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_col_)); + + } + + //if (false) { + // p_mtrx_ = mdarray(unit_cell_.max_mt_basis_size(), unit_cell_.max_mt_basis_size(), unit_cell_.num_atom_types()); + // p_mtrx_.zero(); + + // for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) { + // auto& atom_type = unit_cell_.atom_type(iat); + + // if (!atom_type.pp_desc().augment) { + // continue; + // } + // int nbf = atom_type.mt_basis_size(); + // int ofs = atom_type.offset_lo(); + + // matrix qinv(nbf, nbf); + // for (int xi1 = 0; xi1 < nbf; xi1++) { + // for (int xi2 = 0; xi2 < nbf; xi2++) { + // qinv(xi2, xi1) = ctx_.augmentation_op(iat).q_mtrx(xi2, xi1); + // } + // } + // linalg::geinv(nbf, qinv); + // + // /* compute P^{+}*P */ + // linalg::gemm(2, 0, nbf, nbf, num_gkvec_loc(), + // beta_projectors_->beta_gk_t().at(0, ofs), beta_projectors_->beta_gk_t().ld(), + // beta_projectors_->beta_gk_t().at(0, ofs), beta_projectors_->beta_gk_t().ld(), + // &p_mtrx_(0, 0, iat), p_mtrx_.ld()); + // comm().allreduce(&p_mtrx_(0, 0, iat), unit_cell_.max_mt_basis_size() * unit_cell_.max_mt_basis_size()); + + // for (int xi1 = 0; xi1 < nbf; xi1++) { + // for (int xi2 = 0; xi2 < nbf; xi2++) { + // qinv(xi2, xi1) += p_mtrx_(xi2, xi1, iat); + // } + // } + // /* compute (Q^{-1} + P^{+}*P)^{-1} */ + // linalg::geinv(nbf, qinv); + // for (int xi1 = 0; xi1 < nbf; xi1++) { + // for (int xi2 = 0; xi2 < nbf; xi2++) { + // p_mtrx_(xi2, xi1, iat) = qinv(xi2, xi1); + // } + // } + // } + //} + } + + } + + /// Generate first-variational states from eigen-vectors. + /** First-variational states are obtained from the first-variational eigen-vectors and + * LAPW matching coefficients. + * + * APW part: + * \f[ + * \psi_{\xi j}^{\bf k} = \sum_{{\bf G}} Z_{{\bf G} j}^{\bf k} * A_{\xi}({\bf G+k}) + * \f] + */ + void generate_fv_states(); + + #ifdef __GPU + void generate_fv_states_aw_mt_gpu(); + #endif + + /// Generate two-component spinor wave functions + inline void generate_spinor_wave_functions(); + + inline void generate_atomic_centered_wavefunctions(const int num_ao__, Wave_functions &phi); + + inline void generate_atomic_centered_wavefunctions_aux(const int num_ao__, Wave_functions &phi, std::vector &offset, bool hubbard); + + void compute_gradient_wavefunctions(Wave_functions &phi, + const int starting_position_i, + const int num_wf, + Wave_functions &dphi, + const int starting_position_j, + const int direction); + + void save(int id); + + void load(HDF5_tree h5in, int id); + + //== void save_wave_functions(int id); + + //== void load_wave_functions(int id); + + void get_fv_eigen_vectors(mdarray& fv_evec); + + void get_sv_eigen_vectors(mdarray& sv_evec); + + /// Test orthonormalization of spinor wave-functions + void test_spinor_wave_functions(int use_fft); + + /// Get the number of occupied bands for each spin channel. + int num_occupied_bands(int ispn__ = -1) + { + for (int j = ctx_.num_bands() - 1; j >= 0; j--) { + if (std::abs(band_occupancy(j, ispn__) * weight()) > 1e-14) { + return j + 1; + } + } + return 0; + } + + /// Total number of G+k vectors within the cutoff distance + inline int num_gkvec() const + { + return gkvec_->num_gvec(); + } + + /// Total number of muffin-tin and plane-wave expansion coefficients for the wave-functions. + /** APW+lo basis \f$ \varphi_{\mu {\bf k}}({\bf r}) = \{ \varphi_{\bf G+k}({\bf r}), + * \varphi_{j{\bf k}}({\bf r}) \} \f$ is used to expand first-variational wave-functions: + * + * \f[ + * \psi_{i{\bf k}}({\bf r}) = \sum_{\mu} c_{\mu i}^{\bf k} \varphi_{\mu \bf k}({\bf r}) = + * \sum_{{\bf G}}c_{{\bf G} i}^{\bf k} \varphi_{\bf G+k}({\bf r}) + + * \sum_{j}c_{j i}^{\bf k}\varphi_{j{\bf k}}({\bf r}) + * \f] + * + * Inside muffin-tins the expansion is converted into the following form: + * \f[ + * \psi_{i {\bf k}}({\bf r})= \begin{array}{ll} + * \displaystyle \sum_{L} \sum_{\lambda=1}^{N_{\ell}^{\alpha}} + * F_{L \lambda}^{i {\bf k},\alpha}f_{\ell \lambda}^{\alpha}(r) + * Y_{\ell m}(\hat {\bf r}) & {\bf r} \in MT_{\alpha} \end{array} + * \f] + * + * Thus, the total number of coefficients representing a wave-funstion is equal + * to the number of muffin-tin basis functions of the form \f$ f_{\ell \lambda}^{\alpha}(r) + * Y_{\ell m}(\hat {\bf r}) \f$ plust the number of G+k plane waves. */ + //inline int wf_size() const // TODO: better name for this + //{ + // if (ctx_.full_potential()) { + // return unit_cell_.mt_basis_size() + num_gkvec(); + // } else { + // return num_gkvec(); + // } + //} + + inline double& band_energy(int j__, int ispn__) + { + return band_energy_aux(j__, ispn__); + } + + inline double band_energy(int j__, int ispn__) const + { + auto const& e = const_cast(this)->band_energy_aux(j__, ispn__); + return e; + } + + inline double& band_occupancy(int j__, int ispn__) + { + return band_occupancy_aux(j__, ispn__); + } + + inline double band_occupancy(int j__, int ispn__) const + { + auto const& e = const_cast(this)->band_occupancy_aux(j__, ispn__); + return e; + } + + inline double fv_eigen_value(int i) const + { + return fv_eigen_values_[i]; + } + + void set_fv_eigen_values(double* eval) + { + std::memcpy(&fv_eigen_values_[0], eval, ctx_.num_fv_states() * sizeof(double)); + } + + inline double weight() const + { + return weight_; + } + + inline Wave_functions& fv_states() + { + return *fv_states_; + } + + inline Wave_functions& spinor_wave_functions() + { + return *spinor_wave_functions_; + } + + inline Wave_functions& hubbard_wave_functions() + { + return *hubbard_wave_functions_; + } + + inline Wave_functions const& hubbard_wave_functions() const + { + return *hubbard_wave_functions_; + } + + inline void allocate_hubbard_wave_functions(int size) + { + if (hubbard_wave_functions_ != nullptr) { + return; + } + const int num_sc = ctx_.num_mag_dims() == 3 ? 2 : 1; + hubbard_wave_functions_ = std::unique_ptr(new Wave_functions(gkvec_partition(), + size, + num_sc)); + } + + inline bool hubbard_wave_functions_calculated() + { + return (hubbard_wave_functions_ != nullptr); + } + + inline Wave_functions& singular_components() + { + return *singular_components_; + } + + inline vector3d vk() const + { + return vk_; + } + + /// Basis size of LAPW+lo method. + inline int gklo_basis_size() const + { + return static_cast(num_gkvec() + unit_cell_.mt_lo_basis_size()); + } + + /// Local number of G+k vectors in case of flat distribution. + inline int num_gkvec_loc() const + { + return gkvec().count(); + } + + /// Return global index of G+k vector. + inline int idxgk(int igkloc__) const + { + return gkvec_offset_ + igkloc__; + } + + /// Local number of G+k vectors for each MPI rank in the row of the 2D MPI grid. + inline int num_gkvec_row() const + { + return num_gkvec_row_; + } + + /// Local number of local orbitals for each MPI rank in the row of the 2D MPI grid. + inline int num_lo_row() const + { + return static_cast(lo_basis_descriptors_row_.size()); + } + + /// Local number of basis functions for each MPI rank in the row of the 2D MPI grid. + inline int gklo_basis_size_row() const + { + return num_gkvec_row() + num_lo_row(); + } + + /// Local number of G+k vectors for each MPI rank in the column of the 2D MPI grid. + inline int num_gkvec_col() const + { + return num_gkvec_col_; + } + + /// Local number of local orbitals for each MPI rank in the column of the 2D MPI grid. + inline int num_lo_col() const + { + return static_cast(lo_basis_descriptors_col_.size()); + } + + /// Local number of basis functions for each MPI rank in the column of the 2D MPI grid. + inline int gklo_basis_size_col() const + { + return num_gkvec_col() + num_lo_col(); + } + + inline lo_basis_descriptor const& lo_basis_descriptor_col(int idx) const + { + assert(idx >=0 && idx < (int)lo_basis_descriptors_col_.size()); + return lo_basis_descriptors_col_[idx]; + } + + inline lo_basis_descriptor const& lo_basis_descriptor_row(int idx) const + { + assert(idx >= 0 && idx < (int)lo_basis_descriptors_row_.size()); + return lo_basis_descriptors_row_[idx]; + } + + inline int igk_loc(int idx__) const + { + return igk_loc_[idx__]; + } + + inline std::vector const& igk_loc() const + { + return igk_loc_; + } + + inline int igk_row(int idx__) const + { + return igk_row_[idx__]; + } + + inline std::vector const& igk_row() const + { + return igk_row_; + } + + inline int igk_col(int idx__) const + { + return igk_col_[idx__]; + } + + inline std::vector const& igk_col() const + { + return igk_col_; + } + + inline int num_ranks_row() const + { + return num_ranks_row_; + } + + inline int rank_row() const + { + return rank_row_; + } + + inline int num_ranks_col() const + { + return num_ranks_col_; + } + + inline int rank_col() const + { + return rank_col_; + } + + /// Number of MPI ranks for a given k-point + inline int num_ranks() const + { + return comm_.size(); + } + + inline int rank() const + { + return comm_.rank(); + } + + /// Return number of lo columns for a given atom + inline int num_atom_lo_cols(int ia) const + { + return (int)atom_lo_cols_[ia].size(); + } + + /// Return local index (for the current MPI rank) of a column for a given atom and column index within an atom + inline int lo_col(int ia, int i) const + { + return atom_lo_cols_[ia][i]; + } + + /// Return number of lo rows for a given atom + inline int num_atom_lo_rows(int ia) const + { + return (int)atom_lo_rows_[ia].size(); + } + + /// Return local index (for the current MPI rank) of a row for a given atom and row index within an atom + inline int lo_row(int ia, int i) const + { + return atom_lo_rows_[ia][i]; + } + + inline dmatrix& fv_eigen_vectors() + { + return fv_eigen_vectors_; + } + + inline Wave_functions& fv_eigen_vectors_slab() + { + return *fv_eigen_vectors_slab_; + } + + inline dmatrix& sv_eigen_vectors(int ispn) + { + return sv_eigen_vectors_[ispn]; + } + + inline mdarray& fd_eigen_vectors() + { + return fd_eigen_vectors_; + } + + void bypass_sv() + { + std::memcpy(&band_energies_[0], &fv_eigen_values_[0], ctx_.num_fv_states() * sizeof(double)); + } + + inline Gvec const& gkvec() const + { + return *gkvec_; + } + + inline Gvec_partition const& gkvec_partition() const + { + return *gkvec_partition_; + } + + inline Matching_coefficients const& alm_coeffs_row() + { + return *alm_coeffs_row_; + } + + inline Matching_coefficients const& alm_coeffs_col() + { + return *alm_coeffs_col_; + } + + inline Matching_coefficients const& alm_coeffs_loc() const + { + return *alm_coeffs_loc_; + } + + inline Communicator const& comm() const + { + return comm_; + } + + inline Communicator const& comm_row() const + { + return comm_row_; + } + + inline Communicator const& comm_col() const + { + return comm_col_; + } + + inline double_complex p_mtrx(int xi1, int xi2, int iat) const + { + return p_mtrx_(xi1, xi2, iat); + } + + inline mdarray& p_mtrx() + { + return p_mtrx_; + } + + Beta_projectors& beta_projectors() + { + assert(beta_projectors_ != nullptr); + return *beta_projectors_; + } + + Beta_projectors& beta_projectors_row() + { + assert(beta_projectors_ != nullptr); + return *beta_projectors_row_; + } + + Beta_projectors& beta_projectors_col() + { + assert(beta_projectors_ != nullptr); + return *beta_projectors_col_; + } +}; + //== void K_point::check_alm(int num_gkvec_loc, int ia, mdarray& alm) //== { //== static SHT* sht = NULL; @@ -505,3 +1277,61 @@ inline void K_point::get_sv_eigen_vectors(mdarray& sv_evec) comm_.allreduce(sv_evec.at(), (int)sv_evec.size()); } +#include "generate_fv_states.hpp" +#include "generate_spinor_wave_functions.hpp" +#include "generate_gklo_basis.hpp" +#include "initialize.hpp" +#include "test_fv_states.hpp" +#include "generate_atomic_centered_wavefunctions.hpp" + +} + +/** \page basis Basis functions for Kohn-Sham wave-functions expansion + * + * \section basis1 LAPW+lo basis + * + * LAPW+lo basis consists of two different sets of functions: LAPW functions \f$ \varphi_{{\bf G+k}} \f$ defined over + * entire unit cell: + * \f[ + * \varphi_{{\bf G+k}}({\bf r}) = \left\{ \begin{array}{ll} + * \displaystyle \sum_{L} \sum_{\nu=1}^{O_{\ell}^{\alpha}} a_{L\nu}^{\alpha}({\bf G+k})u_{\ell \nu}^{\alpha}(r) + * Y_{\ell m}(\hat {\bf r}) & {\bf r} \in {\rm MT} \alpha \\ + * \displaystyle \frac{1}{\sqrt \Omega} e^{i({\bf G+k}){\bf r}} & {\bf r} \in {\rm I} \end{array} \right. + * \f] + * and Bloch sums of local orbitals defined inside muffin-tin spheres only: + * \f[ + * \begin{array}{ll} \displaystyle \varphi_{j{\bf k}}({\bf r})=\sum_{{\bf T}} e^{i{\bf kT}} + * \varphi_{j}({\bf r - T}) & {\rm {\bf r} \in MT} \end{array} + * \f] + * Each local orbital is composed of radial and angular parts: + * \f[ + * \varphi_{j}({\bf r}) = \phi_{\ell_j}^{\zeta_j,\alpha_j}(r) Y_{\ell_j m_j}(\hat {\bf r}) + * \f] + * Radial part of local orbital is defined as a linear combination of radial functions (minimum two radial functions + * are required) such that local orbital vanishes at the sphere boundary: + * \f[ + * \phi_{\ell}^{\zeta, \alpha}(r) = \sum_{p}\gamma_{p}^{\zeta,\alpha} u_{\ell \nu_p}^{\alpha}(r) + * \f] + * + * Arbitrary number of local orbitals can be introduced for each angular quantum number (this is highlighted by + * the index \f$ \zeta \f$). + * + * Radial functions are m-th order (with zero-order being a function itself) energy derivatives of the radial + * Schrödinger equation: + * \f[ + * u_{\ell \nu}^{\alpha}(r) = \frac{\partial^{m_{\nu}}}{\partial^{m_{\nu}}E}u_{\ell}^{\alpha}(r,E)\Big|_{E=E_{\nu}} + * \f] + */ + +/** \page data_dist K-point data distribution + * + * \section data_dist1 "Panel" and "full" data storage + * + * We have to deal with big arrays (matching coefficients, eigen vectors, wave functions, etc.) which may not fit + * into the memory of a single node. For some operations we need a "panel" distribution of data, where each + * MPI rank gets a local panel of block-cyclic distributed matrix. This way of storing data is necessary for the + * distributed PBLAS and ScaLAPACK operations. + * + */ + +#endif // __K_POINT_H__ diff --git a/src/k_point_set.h b/src/K_point/k_point_set.hpp similarity index 99% rename from src/k_point_set.h rename to src/K_point/k_point_set.hpp index 503a963f8..892fa37c0 100644 --- a/src/k_point_set.h +++ b/src/K_point/k_point_set.hpp @@ -17,15 +17,15 @@ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -/** \file k_point_set.h +/** \file k_point_set.hpp * * \brief Contains declaration and partial implementation of sirius::K_point_set class. */ -#ifndef __K_POINT_SET_H__ -#define __K_POINT_SET_H__ +#ifndef __K_POINT_SET_HPP__ +#define __K_POINT_SET_HPP__ -#include "k_point.h" +#include "k_point.hpp" #include "geometry3d.hpp" #include "smearing.hpp" diff --git a/src/Potential/generate_d_operator_matrix.hpp b/src/Potential/generate_d_operator_matrix.hpp index 923c24d54..07ef8e362 100644 --- a/src/Potential/generate_d_operator_matrix.hpp +++ b/src/Potential/generate_d_operator_matrix.hpp @@ -40,7 +40,7 @@ inline void Potential::generate_D_operator_matrix() if (ctx_.processing_unit() == GPU) { veff_tmp.allocate(memory_t::device); } - + if (ctx_.unit_cell().atom_type(0).augment() && ctx_.unit_cell().atom_type(0).num_atoms() > 0) { ctx_.augmentation_op(0).prepare(0); } diff --git a/src/xc_functional.h b/src/Potential/xc_functional.hpp similarity index 99% rename from src/xc_functional.h rename to src/Potential/xc_functional.hpp index d97eadd47..5779288b9 100644 --- a/src/xc_functional.h +++ b/src/Potential/xc_functional.hpp @@ -17,13 +17,13 @@ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -/** \file xc_functional.h +/** \file xc_functional.hpp * * \brief Contains implementation of sirius::XC_functional class. */ -#ifndef __XC_FUNCTIONAL_H__ -#define __XC_FUNCTIONAL_H__ +#ifndef __XC_FUNCTIONAL_HPP__ +#define __XC_FUNCTIONAL_HPP__ #include #include diff --git a/src/SHT/gaunt.hpp b/src/SHT/gaunt.hpp new file mode 100644 index 000000000..8fdfb4269 --- /dev/null +++ b/src/SHT/gaunt.hpp @@ -0,0 +1,208 @@ +// Copyright (c) 2013-2018 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file gaunt.hpp + * + * \brief Contains definition and implementation of sirius::Gaunt class. + */ + +#ifndef __GAUNT_HPP__ +#define __GAUNT_HPP__ + +#include "mdarray.hpp" + +namespace sirius { + +/// Used in the {lm1, lm2} : {lm3, coefficient} way of grouping non-zero Gaunt coefficients +template +struct gaunt_L3 +{ + int lm3; + int l3; + T coef; +}; + +/// Used in the {lm1, lm2, coefficient} : {lm3} way of grouping non-zero Gaunt coefficients +template +struct gaunt_L1_L2 +{ + int lm1; + int lm2; + T coef; +}; + +/// Compact storage of non-zero Gaunt coefficients \f$ \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle \f$. +/** Very important! The following notation is adopted and used everywhere: lm1 and lm2 represent 'bra' and 'ket' + * spherical harmonics of the Gaunt integral and lm3 represent the inner spherical harmonic. + */ +template +class Gaunt_coefficients +{ + private: + /// lmax of + int lmax2_; + /// lmmax of |lm2> + int lmmax2_; + + /// List of non-zero Gaunt coefficients for each lm3. + mdarray>, 1> gaunt_packed_L1_L2_; + + /// List of non-zero Gaunt coefficients for each combination of lm1, lm2. + mdarray>, 2> gaunt_packed_L3_; + + public: + /// Class constructor. + Gaunt_coefficients(int lmax1__, + int lmax3__, + int lmax2__, + std::function get__) + : lmax1_(lmax1__) + , lmax3_(lmax3__) + , lmax2_(lmax2__) + { + lmmax1_ = utils::lmmax(lmax1_); + lmmax3_ = utils::lmmax(lmax3_); + lmmax2_ = utils::lmmax(lmax2_); + + gaunt_packed_L1_L2_ = mdarray>, 1>(lmmax3_); + gaunt_L1_L2 g12; + + gaunt_packed_L3_ = mdarray>, 2>(lmmax1_, lmmax2_); + gaunt_L3 g3; + + for (int l1 = 0, lm1 = 0; l1 <= lmax1_; l1++) { + for (int m1 = -l1; m1 <= l1; m1++, lm1++) { + for (int l2 = 0, lm2 = 0; l2 <= lmax2_; l2++) { + for (int m2 = -l2; m2 <= l2; m2++, lm2++) { + for (int l3 = 0, lm3 = 0; l3 <= lmax3_; l3++) { + for (int m3 = -l3; m3 <= l3; m3++, lm3++) { + + T gc = get__(l1, l3, l2, m1, m3, m2); + if (std::abs(gc) > 1e-12) { + g12.lm1 = lm1; + g12.lm2 = lm2; + g12.coef = gc; + gaunt_packed_L1_L2_[lm3].push_back(g12); + + g3.lm3 = lm3; + g3.l3 = l3; + g3.coef = gc; + gaunt_packed_L3_(lm1, lm2).push_back(g3); + } + } + } + } + } + } + } + } + + /// Return number of non-zero Gaunt coefficients for a given lm3. + inline int num_gaunt(int lm3) const + { + assert(lm3 >= 0 && lm3 < lmmax3_); + return static_cast(gaunt_packed_L1_L2_[lm3].size()); + } + + /// Return a structure containing {lm1, lm2, coef} for a given lm3 and index. + /** Example: + * \code{.cpp} + * for (int lm3 = 0; lm3 < lmmax3; lm3++) + * { + * for (int i = 0; i < gaunt_coefs.num_gaunt(lm3); i++) { + * int lm1 = gaunt_coefs.gaunt(lm3, i).lm1; + * int lm2 = gaunt_coefs.gaunt(lm3, i).lm2; + * double coef = gaunt_coefs.gaunt(lm3, i).coef; + * + * // do something with lm1,lm2,lm3 and coef + * } + * } + * \endcode + */ + inline gaunt_L1_L2 const& gaunt(int lm3, int idx) const + { + assert(lm3 >= 0 && lm3 < lmmax3_); + assert(idx >= 0 && idx < (int)gaunt_packed_L1_L2_[lm3].size()); + return gaunt_packed_L1_L2_[lm3][idx]; + } + + /// Return number of non-zero Gaunt coefficients for a combination of lm1 and lm2. + inline int num_gaunt(int lm1, int lm2) const + { + return static_cast(gaunt_packed_L3_(lm1, lm2).size()); + } + + /// Return a structure containing {lm3, coef} for a given lm1, lm2 and index + inline gaunt_L3 const& gaunt(int lm1, int lm2, int idx) const + { + return gaunt_packed_L3_(lm1, lm2)[idx]; + } + + /// Return a sum over L3 (lm3) index of Gaunt coefficients and a complex vector. + /** The following operation is performed: + * \f[ + * \sum_{\ell_3 m_3} \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle v_{\ell_3 m_3} + * \f] + * Result is assumed to be complex. + */ + inline double_complex sum_L3_gaunt(int lm1, int lm2, double_complex const* v) const + { + double_complex zsum(0, 0); + for (int k = 0; k < (int)gaunt_packed_L3_(lm1, lm2).size(); k++) { + zsum += gaunt_packed_L3_(lm1, lm2)[k].coef * v[gaunt_packed_L3_(lm1, lm2)[k].lm3]; + } + return zsum; + } + + /// Return a sum over L3 (lm3) index of Gaunt coefficients and a real vector. + /** The following operation is performed: + * \f[ + * \sum_{\ell_3 m_3} \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle v_{\ell_3 m_3} + * \f] + * Result is assumed to be of the same type as Gaunt coefficients. + */ + inline T sum_L3_gaunt(int lm1, int lm2, double const* v) const + { + T sum = 0; + for (int k = 0; k < (int)gaunt_packed_L3_(lm1, lm2).size(); k++) { + sum += gaunt_packed_L3_(lm1, lm2)[k].coef * v[gaunt_packed_L3_(lm1, lm2)[k].lm3]; + } + return sum; + } + + /// Return vector of non-zero Gaunt coefficients for a given combination of lm1 and lm2 + inline std::vector> const& gaunt_vector(int lm1, int lm2) const + { + return gaunt_packed_L3_(lm1, lm2); + } +}; + +}; // namespace sirius + +#endif diff --git a/src/lebedev_grids.hpp b/src/SHT/lebedev_grids.hpp similarity index 100% rename from src/lebedev_grids.hpp rename to src/SHT/lebedev_grids.hpp diff --git a/src/SHT/sht.hpp b/src/SHT/sht.hpp new file mode 100644 index 000000000..51335a83a --- /dev/null +++ b/src/SHT/sht.hpp @@ -0,0 +1,1296 @@ +// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are permitted provided that +// the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the +// following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions +// and the following disclaimer in the documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/** \file sht.hpp + * + * \brief Contains declaration and particular implementation of sirius::SHT class. + */ + +#ifndef __SHT_HPP__ +#define __SHT_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include "typedefs.h" +#include "linalg.hpp" +#include "lebedev_grids.hpp" + +namespace sirius { + +/// Spherical harmonics transformations and related oprtations. +/** This class is responsible for the generation of complex and real spherical harmonics, generation of transformation + * matrices, transformation between spectral and real-space representations, generation of Gaunt and Clebsch-Gordan + * coefficients and calculation of spherical harmonic derivatives */ +class SHT // TODO: better name +{ + private: + /// Maximum \f$ \ell \f$ of spherical harmonics. + int lmax_; + + /// Maximum number of \f$ \ell, m \f$ components. + int lmmax_; + + /// Number of real-space \f$ (\theta, \phi) \f$ points on the sphere. + int num_points_; + + /// Cartesian coordinates of points (normalized to 1). + mdarray coord_; + + /// \f$ (\theta, \phi) \f$ angles of points. + mdarray tp_; + + /// Point weights. + std::vector w_; + + /// Backward transformation from Ylm to spherical coordinates. + mdarray ylm_backward_; + + /// Forward transformation from spherical coordinates to Ylm. + mdarray ylm_forward_; + + /// Backward transformation from Rlm to spherical coordinates. + mdarray rlm_backward_; + + /// Forward transformation from spherical coordinates to Rlm. + mdarray rlm_forward_; + + /// Type of spherical grid (0: Lebedev-Laikov, 1: uniform). + int mesh_type_{0}; + + public: + /// Default constructor. + SHT(int lmax__) + : lmax_(lmax__) + { + lmmax_ = (lmax_ + 1) * (lmax_ + 1); + + if (mesh_type_ == 0) { + num_points_ = Lebedev_Laikov_npoint(2 * lmax_); + } + if (mesh_type_ == 1) { + num_points_ = lmmax_; + } + + std::vector x(num_points_); + std::vector y(num_points_); + std::vector z(num_points_); + + coord_ = mdarray(3, num_points_); + + tp_ = mdarray(2, num_points_); + + w_.resize(num_points_); + + if (mesh_type_ == 0) + Lebedev_Laikov_sphere(num_points_, &x[0], &y[0], &z[0], &w_[0]); + if (mesh_type_ == 1) + uniform_coverage(); + + ylm_backward_ = mdarray(lmmax_, num_points_); + + ylm_forward_ = mdarray(num_points_, lmmax_); + + rlm_backward_ = mdarray(lmmax_, num_points_); + + rlm_forward_ = mdarray(num_points_, lmmax_); + + for (int itp = 0; itp < num_points_; itp++) { + if (mesh_type_ == 0) { + coord_(0, itp) = x[itp]; + coord_(1, itp) = y[itp]; + coord_(2, itp) = z[itp]; + + auto vs = spherical_coordinates(vector3d(x[itp], y[itp], z[itp])); + tp_(0, itp) = vs[1]; + tp_(1, itp) = vs[2]; + spherical_harmonics(lmax_, vs[1], vs[2], &ylm_backward_(0, itp)); + spherical_harmonics(lmax_, vs[1], vs[2], &rlm_backward_(0, itp)); + for (int lm = 0; lm < lmmax_; lm++) { + ylm_forward_(itp, lm) = std::conj(ylm_backward_(lm, itp)) * w_[itp] * fourpi; + rlm_forward_(itp, lm) = rlm_backward_(lm, itp) * w_[itp] * fourpi; + } + } + if (mesh_type_ == 1) { + double t = tp_(0, itp); + double p = tp_(1, itp); + + coord_(0, itp) = sin(t) * cos(p); + coord_(1, itp) = sin(t) * sin(p); + coord_(2, itp) = cos(t); + + spherical_harmonics(lmax_, t, p, &ylm_backward_(0, itp)); + spherical_harmonics(lmax_, t, p, &rlm_backward_(0, itp)); + + for (int lm = 0; lm < lmmax_; lm++) { + ylm_forward_(lm, itp) = ylm_backward_(lm, itp); + rlm_forward_(lm, itp) = rlm_backward_(lm, itp); + } + } + } + + if (mesh_type_ == 1) { + linalg::geinv(lmmax_, ylm_forward_); + linalg::geinv(lmmax_, rlm_forward_); + } + +#if (__VERIFICATION > 0) + { + double dr = 0; + double dy = 0; + + for (int lm = 0; lm < lmmax_; lm++) { + for (int lm1 = 0; lm1 < lmmax_; lm1++) { + double t = 0; + double_complex zt(0, 0); + for (int itp = 0; itp < num_points_; itp++) { + zt += ylm_forward_(itp, lm) * ylm_backward_(lm1, itp); + t += rlm_forward_(itp, lm) * rlm_backward_(lm1, itp); + } + + if (lm == lm1) { + zt -= 1.0; + t -= 1.0; + } + dr += std::abs(t); + dy += std::abs(zt); + } + } + dr = dr / lmmax_ / lmmax_; + dy = dy / lmmax_ / lmmax_; + + if (dr > 1e-15 || dy > 1e-15) { + std::stringstream s; + s << "spherical mesh error is too big" << std::endl + << " real spherical integration error " << dr << std::endl + << " complex spherical integration error " << dy; + WARNING(s.str()) + } + + std::vector flm(lmmax_); + std::vector ftp(num_points_); + for (int lm = 0; lm < lmmax_; lm++) { + std::memset(&flm[0], 0, lmmax_ * sizeof(double)); + flm[lm] = 1.0; + backward_transform(lmmax_, &flm[0], 1, lmmax_, &ftp[0]); + forward_transform(&ftp[0], 1, lmmax_, lmmax_, &flm[0]); + flm[lm] -= 1.0; + + double t = 0.0; + for (int lm1 = 0; lm1 < lmmax_; lm1++) + t += std::abs(flm[lm1]); + + t /= lmmax_; + + if (t > 1e-15) { + std::stringstream s; + s << "test of backward / forward real SHT failed" << std::endl + << " total error " << t; + WARNING(s.str()); + } + } + } +#endif + } + + /// Perform a backward transformation from spherical harmonics to spherical coordinates. + /** \f[ + * f(\theta, \phi, r) = \sum_{\ell m} f_{\ell m}(r) Y_{\ell m}(\theta, \phi) + * \f] + * + * \param [in] ld Size of leading dimension of flm. + * \param [in] flm Raw pointer to \f$ f_{\ell m}(r) \f$. + * \param [in] nr Number of radial points. + * \param [in] lmmax Maximum number of lm- harmonics to take into sum. + * \param [out] ftp Raw pointer to \f$ f(\theta, \phi, r) \f$. + */ + template + void backward_transform(int ld, T const* flm, int nr, int lmmax, T* ftp); + + /// Perform a forward transformation from spherical coordinates to spherical harmonics. + /** \f[ + * f_{\ell m}(r) = \iint f(\theta, \phi, r) Y_{\ell m}^{*}(\theta, \phi) \sin \theta d\phi d\theta = + * \sum_{i} f(\theta_i, \phi_i, r) Y_{\ell m}^{*}(\theta_i, \phi_i) w_i + * \f] + * + * \param [in] ftp Raw pointer to \f$ f(\theta, \phi, r) \f$. + * \param [in] nr Number of radial points. + * \param [in] lmmax Maximum number of lm- coefficients to generate. + * \param [in] ld Size of leading dimension of flm. + * \param [out] flm Raw pointer to \f$ f_{\ell m}(r) \f$. + */ + template + void forward_transform(T const* ftp, int nr, int lmmax, int ld, T* flm); + + /// Convert form Rlm to Ylm representation. + static void convert(int lmax__, double const* f_rlm__, double_complex* f_ylm__) + { + int lm = 0; + for (int l = 0; l <= lmax__; l++) { + for (int m = -l; m <= l; m++) { + if (m == 0) { + f_ylm__[lm] = f_rlm__[lm]; + } else { + int lm1 = utils::lm(l, -m); + f_ylm__[lm] = ylm_dot_rlm(l, m, m) * f_rlm__[lm] + ylm_dot_rlm(l, m, -m) * f_rlm__[lm1]; + } + lm++; + } + } + } + + /// Convert from Ylm to Rlm representation. + static void convert(int lmax__, double_complex const* f_ylm__, double* f_rlm__) + { + int lm = 0; + for (int l = 0; l <= lmax__; l++) { + for (int m = -l; m <= l; m++) { + if (m == 0) { + f_rlm__[lm] = std::real(f_ylm__[lm]); + } else { + int lm1 = utils::lm(l, -m); + f_rlm__[lm] = std::real(rlm_dot_ylm(l, m, m) * f_ylm__[lm] + rlm_dot_ylm(l, m, -m) * f_ylm__[lm1]); + } + lm++; + } + } + } + + //void rlm_forward_iterative_transform(double *ftp__, int lmmax, int ncol, double* flm) + //{ + // Timer t("sirius::SHT::rlm_forward_iterative_transform"); + // + // assert(lmmax <= lmmax_); + + // mdarray ftp(ftp__, num_points_, ncol); + // mdarray ftp1(num_points_, ncol); + // + // blas::gemm(1, 0, lmmax, ncol, num_points_, 1.0, &rlm_forward_(0, 0), num_points_, &ftp(0, 0), num_points_, 0.0, + // flm, lmmax); + // + // for (int i = 0; i < 2; i++) + // { + // rlm_backward_transform(flm, lmmax, ncol, &ftp1(0, 0)); + // double tdiff = 0.0; + // for (int ir = 0; ir < ncol; ir++) + // { + // for (int itp = 0; itp < num_points_; itp++) + // { + // ftp1(itp, ir) = ftp(itp, ir) - ftp1(itp, ir); + // //tdiff += fabs(ftp1(itp, ir)); + // } + // } + // + // for (int itp = 0; itp < num_points_; itp++) + // { + // tdiff += fabs(ftp1(itp, ncol - 1)); + // } + // std::cout << "iter : " << i << " avg. MT diff = " << tdiff / num_points_ << std::endl; + // blas::gemm(1, 0, lmmax, ncol, num_points_, 1.0, &rlm_forward_(0, 0), num_points_, &ftp1(0, 0), num_points_, 1.0, + // flm, lmmax); + // } + //} + + /// Transform Cartesian coordinates [x,y,z] to spherical coordinates [r,theta,phi] + static vector3d spherical_coordinates(vector3d vc) + { + vector3d vs; + + const double eps{1e-12}; + + vs[0] = vc.length(); + + if (vs[0] <= eps) { + vs[1] = 0.0; + vs[2] = 0.0; + } else { + vs[1] = std::acos(vc[2] / vs[0]); // theta = cos^{-1}(z/r) + + if (std::abs(vc[0]) > eps || std::abs(vc[1]) > eps) { + vs[2] = std::atan2(vc[1], vc[0]); // phi = tan^{-1}(y/x) + if (vs[2] < 0.0) { + vs[2] += twopi; + } + } else { + vs[2] = 0.0; + } + } + + return vs; + } + + /// Generate complex spherical harmonics Ylm + static void spherical_harmonics(int lmax, double theta, double phi, double_complex* ylm) + { + double x = std::cos(theta); + + std::vector result_array(lmax + 1); + + for (int l = 0; l <= lmax; l++) { + for (int m = 0; m <= l; m++) { + double_complex z = std::exp(double_complex(0.0, m * phi)); + ylm[utils::lm(l, m)] = gsl_sf_legendre_sphPlm(l, m, x) * z; + if (m % 2) { + ylm[utils::lm(l, -m)] = -std::conj(ylm[utils::lm(l, m)]); + } else { + ylm[utils::lm(l, -m)] = std::conj(ylm[utils::lm(l, m)]); + } + } + } + } + + /// Generate real spherical harmonics Rlm + /** Mathematica code: + * \verbatim + * R[l_, m_, th_, ph_] := + * If[m > 0, std::sqrt[2]*ComplexExpand[Re[SphericalHarmonicY[l, m, th, ph]]], + * If[m < 0, std::sqrt[2]*ComplexExpand[Im[SphericalHarmonicY[l, m, th, ph]]], + * If[m == 0, ComplexExpand[Re[SphericalHarmonicY[l, 0, th, ph]]]]]] + * \endverbatim + */ + static void spherical_harmonics(int lmax, double theta, double phi, double* rlm) + { + int lmmax = (lmax + 1) * (lmax + 1); + + std::vector ylm(lmmax); + spherical_harmonics(lmax, theta, phi, &ylm[0]); + + double const t = std::sqrt(2.0); + + rlm[0] = y00; + + for (int l = 1; l <= lmax; l++) { + for (int m = -l; m < 0; m++) { + rlm[utils::lm(l, m)] = t * ylm[utils::lm(l, m)].imag(); + } + + rlm[utils::lm(l, 0)] = ylm[utils::lm(l, 0)].real(); + + for (int m = 1; m <= l; m++) { + rlm[utils::lm(l, m)] = t * ylm[utils::lm(l, m)].real(); + } + } + } + + /// Generate real or complex spherical harmonics for a given Cartesian vectors. + template + static std::vector spherical_harmonics(int lmax__, vector3d vc__) + { + auto rtp = spherical_coordinates(vc__); + + std::vector v(utils::lmmax(lmax__)); + spherical_harmonics(lmax__, rtp[1], rtp[2], v.data()); + return std::move(v); + } + + /// Compute element of the transformation matrix from complex to real spherical harmonics. + /** Real spherical harmonic can be written as a linear combination of complex harmonics: + \f[ + R_{\ell m}(\theta, \phi) = \sum_{m'} a^{\ell}_{m' m}Y_{\ell m'}(\theta, \phi) + \f] + where + \f[ + a^{\ell}_{m' m} = \langle Y_{\ell m'} | R_{\ell m} \rangle + \f] + which gives the name to this function. + + Transformation from real to complex spherical harmonics is conjugate transpose: + + \f[ + Y_{\ell m}(\theta, \phi) = \sum_{m'} a^{\ell*}_{m m'}R_{\ell m'}(\theta, \phi) + \f] + + Mathematica code: + \verbatim + b[m1_, m2_] := + If[m1 == 0, 1, + If[m1 < 0 && m2 < 0, -I/Sqrt[2], + If[m1 > 0 && m2 < 0, (-1)^m1*I/Sqrt[2], + If[m1 < 0 && m2 > 0, (-1)^m2/Sqrt[2], + If[m1 > 0 && m2 > 0, 1/Sqrt[2]]]]]] + + a[m1_, m2_] := If[Abs[m1] == Abs[m2], b[m1, m2], 0] + + Rlm[l_, m_, t_, p_] := Sum[a[m1, m]*SphericalHarmonicY[l, m1, t, p], {m1, -l, l}] + \endverbatim + */ + static inline double_complex ylm_dot_rlm(int l, int m1, int m2) + { + double const isqrt2 = 1.0 / std::sqrt(2); + + assert(l >= 0 && std::abs(m1) <= l && std::abs(m2) <= l); + + if (!((m1 == m2) || (m1 == -m2))) { + return double_complex(0, 0); + } + + if (m1 == 0) { + return double_complex(1, 0); + } + + if (m1 < 0) { + if (m2 < 0) { + return -double_complex(0, isqrt2); + } else { + return std::pow(-1.0, m2) * double_complex(isqrt2, 0); + } + } else { + if (m2 < 0) { + return std::pow(-1.0, m1) * double_complex(0, isqrt2); + } else { + return double_complex(isqrt2, 0); + } + } + } + + static inline double_complex rlm_dot_ylm(int l, int m1, int m2) + { + return std::conj(ylm_dot_rlm(l, m2, m1)); + } + + /// Gaunt coefficent of three complex spherical harmonics. + /** + * \f[ + * \langle Y_{\ell_1 m_1} | Y_{\ell_2 m_2} | Y_{\ell_3 m_3} \rangle + * \f] + */ + static double gaunt_ylm(int l1, int l2, int l3, int m1, int m2, int m3) + { + assert(l1 >= 0); + assert(l2 >= 0); + assert(l3 >= 0); + assert(m1 >= -l1 && m1 <= l1); + assert(m2 >= -l2 && m2 <= l2); + assert(m3 >= -l3 && m3 <= l3); + + return std::pow(-1.0, std::abs(m1)) * std::sqrt(double(2 * l1 + 1) * double(2 * l2 + 1) * double(2 * l3 + 1) / fourpi) * + gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, 0, 0, 0) * + gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, -2 * m1, 2 * m2, 2 * m3); + } + + /// Gaunt coefficent of three real spherical harmonics. + /** + * \f[ + * \langle R_{\ell_1 m_1} | R_{\ell_2 m_2} | R_{\ell_3 m_3} \rangle + * \f] + */ + static double gaunt_rlm(int l1, int l2, int l3, int m1, int m2, int m3) + { + assert(l1 >= 0); + assert(l2 >= 0); + assert(l3 >= 0); + assert(m1 >= -l1 && m1 <= l1); + assert(m2 >= -l2 && m2 <= l2); + assert(m3 >= -l3 && m3 <= l3); + + double d = 0; + for (int k1 = -l1; k1 <= l1; k1++) { + for (int k2 = -l2; k2 <= l2; k2++) { + for (int k3 = -l3; k3 <= l3; k3++) { + d += std::real(std::conj(SHT::ylm_dot_rlm(l1, k1, m1)) * + SHT::ylm_dot_rlm(l2, k2, m2) * + SHT::ylm_dot_rlm(l3, k3, m3)) * + SHT::gaunt_ylm(l1, l2, l3, k1, k2, k3); + } + } + } + return d; + } + + /// Gaunt coefficent of two real spherical harmonics with a complex one. + /** + * \f[ + * \langle R_{\ell_1 m_1} | Y_{\ell_2 m_2} | R_{\ell_3 m_3} \rangle + * \f] + */ + static double gaunt_rlm_ylm_rlm(int l1, int l2, int l3, int m1, int m2, int m3) + { + assert(l1 >= 0); + assert(l2 >= 0); + assert(l3 >= 0); + assert(m1 >= -l1 && m1 <= l1); + assert(m2 >= -l2 && m2 <= l2); + assert(m3 >= -l3 && m3 <= l3); + + double d = 0; + for (int k1 = -l1; k1 <= l1; k1++) { + for (int k3 = -l3; k3 <= l3; k3++) { + d += std::real(std::conj(SHT::ylm_dot_rlm(l1, k1, m1)) * + SHT::ylm_dot_rlm(l3, k3, m3)) * + SHT::gaunt_ylm(l1, l2, l3, k1, m2, k3); + } + } + return d; + } + + /// Gaunt coefficent of two complex and one real spherical harmonics. + /** + * \f[ + * \langle Y_{\ell_1 m_1} | R_{\ell_2 m_2} | Y_{\ell_3 m_3} \rangle + * \f] + */ + static double_complex gaunt_hybrid(int l1, int l2, int l3, int m1, int m2, int m3) + { + assert(l1 >= 0); + assert(l2 >= 0); + assert(l3 >= 0); + assert(m1 >= -l1 && m1 <= l1); + assert(m2 >= -l2 && m2 <= l2); + assert(m3 >= -l3 && m3 <= l3); + + if (m2 == 0) { + return double_complex(gaunt_ylm(l1, l2, l3, m1, m2, m3), 0.0); + } else { + return (ylm_dot_rlm(l2, m2, m2) * gaunt_ylm(l1, l2, l3, m1, m2, m3) + + ylm_dot_rlm(l2, -m2, m2) * gaunt_ylm(l1, l2, l3, m1, -m2, m3)); + } + } + + void uniform_coverage() + { + tp_(0, 0) = pi; + tp_(1, 0) = 0; + + for (int k = 1; k < num_points_ - 1; k++) { + double hk = -1.0 + double(2 * k) / double(num_points_ - 1); + tp_(0, k) = std::acos(hk); + double t = tp_(1, k - 1) + 3.80925122745582 / std::sqrt(double(num_points_)) / std::sqrt(1 - hk * hk); + tp_(1, k) = std::fmod(t, twopi); + } + + tp_(0, num_points_ - 1) = 0; + tp_(1, num_points_ - 1) = 0; + } + + /// Return Clebsch-Gordan coefficient. + /** Clebsch-Gordan coefficients arise when two angular momenta are combined into a + * total angular momentum. + */ + static inline double clebsch_gordan(int l1, int l2, int l3, int m1, int m2, int m3) + { + assert(l1 >= 0); + assert(l2 >= 0); + assert(l3 >= 0); + assert(m1 >= -l1 && m1 <= l1); + assert(m2 >= -l2 && m2 <= l2); + assert(m3 >= -l3 && m3 <= l3); + + return std::pow(-1, l1 - l2 + m3) * std::sqrt(double(2 * l3 + 1)) * + gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, 2 * m1, 2 * m2, -2 * m3); + } + + inline double_complex ylm_backward(int lm, int itp) const + { + return ylm_backward_(lm, itp); + } + + inline double rlm_backward(int lm, int itp) const + { + return rlm_backward_(lm, itp); + } + + inline double coord(int x, int itp) const + { + return coord_(x, itp); + } + + inline vector3d coord(int idx__) const + { + return vector3d(coord_(0, idx__), coord_(1, idx__), coord(2, idx__)); + } + + inline double theta(int idx__) const + { + return tp_(0, idx__); + } + + inline double phi(int idx__) const + { + return tp_(1, idx__); + } + + inline double weight(int idx__) const + { + return w_[idx__]; + } + + inline int num_points() const + { + return num_points_; + } + + inline int lmax() const + { + return lmax_; + } + + inline int lmmax() const + { + return lmmax_; + } + + static void wigner_d_matrix(int l, double beta, mdarray& d_mtrx__) + { + long double cos_b2 = std::cos((long double)beta / 2.0L); + long double sin_b2 = std::sin((long double)beta / 2.0L); + + for (int m1 = -l; m1 <= l; m1++) { + for (int m2 = -l; m2 <= l; m2++) { + long double d = 0; + for (int j = 0; j <= std::min(l + m1, l - m2); j++) { + if ((l - m2 - j) >= 0 && (l + m1 - j) >= 0 && (j + m2 - m1) >= 0) { + long double g = (std::sqrt(utils::factorial(l + m1)) / utils::factorial(l - m2 - j)) * + (std::sqrt(utils::factorial(l - m1)) / utils::factorial(l + m1 - j)) * + (std::sqrt(utils::factorial(l - m2)) / utils::factorial(j + m2 - m1)) * + (std::sqrt(utils::factorial(l + m2)) / utils::factorial(j)); + d += g * std::pow(-1, j) * std::pow(cos_b2, 2 * l + m1 - m2 - 2 * j) * std::pow(sin_b2, 2 * j + m2 - m1); + } + } + d_mtrx__(m1 + l, m2 + l) = (double)d; + } + } + } + + static void rotation_matrix_l(int l, vector3d euler_angles, int proper_rotation, + double_complex* rot_mtrx__, int ld) + { + mdarray rot_mtrx(rot_mtrx__, ld, 2 * l + 1); + + mdarray d_mtrx(2 * l + 1, 2 * l + 1); + wigner_d_matrix(l, euler_angles[1], d_mtrx); + + for (int m1 = -l; m1 <= l; m1++) { + for (int m2 = -l; m2 <= l; m2++) { + rot_mtrx(m1 + l, m2 + l) = std::exp(double_complex(0, -euler_angles[0] * m1 - euler_angles[2] * m2)) * + d_mtrx(m1 + l, m2 + l) * std::pow(proper_rotation, l); + } + } + } + + static void rotation_matrix_l(int l, vector3d euler_angles, int proper_rotation, + double* rot_mtrx__, int ld) + { + mdarray rot_mtrx_rlm(rot_mtrx__, ld, 2 * l + 1); + mdarray rot_mtrx_ylm(2 * l + 1, 2 * l + 1); + + mdarray d_mtrx(2 * l + 1, 2 * l + 1); + wigner_d_matrix(l, euler_angles[1], d_mtrx); + + for (int m1 = -l; m1 <= l; m1++) { + for (int m2 = -l; m2 <= l; m2++) { + rot_mtrx_ylm(m1 + l, m2 + l) = std::exp(double_complex(0, -euler_angles[0] * m1 - euler_angles[2] * m2)) * + d_mtrx(m1 + l, m2 + l) * std::pow(proper_rotation, l); + } + } + for (int m1 = -l; m1 <= l; m1++) { + auto i13 = (m1 == 0) ? std::vector({0}) : std::vector({-m1, m1}); + + for (int m2 = -l; m2 <= l; m2++) { + auto i24 = (m2 == 0) ? std::vector({0}) : std::vector({-m2, m2}); + + for (int m3 : i13) { + for (int m4 : i24) { + rot_mtrx_rlm(m1 + l, m2 + l) += std::real(rlm_dot_ylm(l, m1, m3) * + rot_mtrx_ylm(m3 + l, m4 + l) * + ylm_dot_rlm(l, m4, m2)); + } + } + } + } + } + + template + static void rotation_matrix(int lmax, + vector3d euler_angles, + int proper_rotation, + mdarray& rotm) + { + rotm.zero(); + + for (int l = 0; l <= lmax; l++) { + rotation_matrix_l(l, euler_angles, proper_rotation, &rotm(l * l, l * l), rotm.ld()); + } + } + + /// Compute derivative of real-spherical harmonic with respect to theta angle. + static void dRlm_dtheta(int lmax, double theta, double phi, mdarray& data) + { + assert(lmax <= 8); + + data[0] = 0; + + if (lmax == 0) + return; + + auto cos_theta = SHT::cosxn(lmax, theta); + auto sin_theta = SHT::sinxn(lmax, theta); + auto cos_phi = SHT::cosxn(lmax, phi); + auto sin_phi = SHT::sinxn(lmax, phi); + + data[1] = -(std::sqrt(3 / pi) * cos_theta[0] * sin_phi[0]) / 2.; + + data[2] = -(std::sqrt(3 / pi) * sin_theta[0]) / 2.; + + data[3] = -(std::sqrt(3 / pi) * cos_phi[0] * cos_theta[0]) / 2.; + + if (lmax == 1) + return; + + data[4] = -(std::sqrt(15 / pi) * cos_phi[0] * cos_theta[0] * sin_phi[0] * sin_theta[0]); + + data[5] = -(std::sqrt(15 / pi) * cos_theta[1] * sin_phi[0]) / 2.; + + data[6] = (-3 * std::sqrt(5 / pi) * cos_theta[0] * sin_theta[0]) / 2.; + + data[7] = -(std::sqrt(15 / pi) * cos_phi[0] * cos_theta[1]) / 2.; + + data[8] = (std::sqrt(15 / pi) * cos_phi[1] * sin_theta[1]) / 4.; + + if (lmax == 2) + return; + + data[9] = (-3 * std::sqrt(35 / (2. * pi)) * cos_theta[0] * sin_phi[2] * std::pow(sin_theta[0], 2)) / 4.; + + data[10] = (std::sqrt(105 / pi) * sin_phi[1] * (sin_theta[0] - 3 * sin_theta[2])) / 16.; + + data[11] = -(std::sqrt(21 / (2. * pi)) * (cos_theta[0] + 15 * cos_theta[2]) * sin_phi[0]) / 16.; + + data[12] = (-3 * std::sqrt(7 / pi) * (sin_theta[0] + 5 * sin_theta[2])) / 16.; + + data[13] = -(std::sqrt(21 / (2. * pi)) * cos_phi[0] * (cos_theta[0] + 15 * cos_theta[2])) / 16.; + + data[14] = -(std::sqrt(105 / pi) * cos_phi[1] * (sin_theta[0] - 3 * sin_theta[2])) / 16.; + + data[15] = (-3 * std::sqrt(35 / (2. * pi)) * cos_phi[2] * cos_theta[0] * std::pow(sin_theta[0], 2)) / 4.; + + if (lmax == 3) + return; + + data[16] = (-3 * std::sqrt(35 / pi) * cos_theta[0] * sin_phi[3] * std::pow(sin_theta[0], 3)) / 4.; + + data[17] = (-3 * std::sqrt(35 / (2. * pi)) * (1 + 2 * cos_theta[1]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 4.; + + data[18] = (3 * std::sqrt(5 / pi) * sin_phi[1] * (2 * sin_theta[1] - 7 * sin_theta[3])) / 16.; + + data[19] = (-3 * std::sqrt(5 / (2. * pi)) * (cos_theta[1] + 7 * cos_theta[3]) * sin_phi[0]) / 8.; + + data[20] = (-15 * (2 * sin_theta[1] + 7 * sin_theta[3])) / (32. * std::sqrt(pi)); + + data[21] = (-3 * std::sqrt(5 / (2. * pi)) * cos_phi[0] * (cos_theta[1] + 7 * cos_theta[3])) / 8.; + + data[22] = (3 * std::sqrt(5 / pi) * cos_phi[1] * (-2 * sin_theta[1] + 7 * sin_theta[3])) / 16.; + + data[23] = (-3 * std::sqrt(35 / (2. * pi)) * cos_phi[2] * (1 + 2 * cos_theta[1]) * std::pow(sin_theta[0], 2)) / 4.; + + data[24] = (3 * std::sqrt(35 / pi) * cos_phi[3] * cos_theta[0] * std::pow(sin_theta[0], 3)) / 4.; + + if (lmax == 4) + return; + + data[25] = (-15 * std::sqrt(77 / (2. * pi)) * cos_theta[0] * sin_phi[4] * std::pow(sin_theta[0], 4)) / 16.; + + data[26] = (-3 * std::sqrt(385 / pi) * (3 + 5 * cos_theta[1]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 32.; + + data[27] = (-3 * std::sqrt(385 / (2. * pi)) * cos_theta[0] * (1 + 15 * cos_theta[1]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 32.; + + data[28] = (std::sqrt(1155 / pi) * sin_phi[1] * (2 * sin_theta[0] + 3 * (sin_theta[2] - 5 * sin_theta[4]))) / 128.; + + data[29] = -(std::sqrt(165 / pi) * (2 * cos_theta[0] + 21 * (cos_theta[2] + 5 * cos_theta[4])) * sin_phi[0]) / 256.; + + data[30] = (-15 * std::sqrt(11 / pi) * (2 * sin_theta[0] + 7 * (sin_theta[2] + 3 * sin_theta[4]))) / 256.; + + data[31] = -(std::sqrt(165 / pi) * cos_phi[0] * (2 * cos_theta[0] + 21 * (cos_theta[2] + 5 * cos_theta[4]))) / 256.; + + data[32] = (std::sqrt(1155 / pi) * cos_phi[1] * (-2 * sin_theta[0] - 3 * sin_theta[2] + 15 * sin_theta[4])) / 128.; + + data[33] = (-3 * std::sqrt(385 / (2. * pi)) * cos_phi[2] * (17 * cos_theta[0] + 15 * cos_theta[2]) * std::pow(sin_theta[0], 2)) / 64.; + + data[34] = (3 * std::sqrt(385 / pi) * cos_phi[3] * (3 + 5 * cos_theta[1]) * std::pow(sin_theta[0], 3)) / 32.; + + data[35] = (-15 * std::sqrt(77 / (2. * pi)) * cos_phi[4] * cos_theta[0] * std::pow(sin_theta[0], 4)) / 16.; + + if (lmax == 5) + return; + + data[36] = (-3 * std::sqrt(3003 / (2. * pi)) * cos_theta[0] * sin_phi[5] * std::pow(sin_theta[0], 5)) / 16.; + + data[37] = (-3 * std::sqrt(1001 / (2. * pi)) * (2 + 3 * cos_theta[1]) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 16.; + + data[38] = (-3 * std::sqrt(91 / pi) * cos_theta[0] * (7 + 33 * cos_theta[1]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 32.; + + data[39] = (-3 * std::sqrt(1365 / (2. * pi)) * (7 + 14 * cos_theta[1] + 11 * cos_theta[3]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 64.; + + data[40] = (std::sqrt(1365 / (2. * pi)) * sin_phi[1] * (17 * sin_theta[1] + 12 * sin_theta[3] - 99 * sin_theta[5])) / 512.; + + data[41] = -(std::sqrt(273 / pi) * (5 * cos_theta[1] + 24 * cos_theta[3] + 99 * cos_theta[5]) * sin_phi[0]) / 256.; + + data[42] = (-21 * std::sqrt(13 / pi) * (5 * sin_theta[1] + 12 * sin_theta[3] + 33 * sin_theta[5])) / 512.; + + data[43] = -(std::sqrt(273 / pi) * cos_phi[0] * (5 * cos_theta[1] + 24 * cos_theta[3] + 99 * cos_theta[5])) / 256.; + + data[44] = (std::sqrt(1365 / (2. * pi)) * cos_phi[1] * (-17 * sin_theta[1] - 12 * sin_theta[3] + 99 * sin_theta[5])) / 512.; + + data[45] = (-3 * std::sqrt(1365 / (2. * pi)) * cos_phi[2] * (7 + 14 * cos_theta[1] + 11 * cos_theta[3]) * std::pow(sin_theta[0], 2)) / 64.; + + data[46] = (3 * std::sqrt(91 / pi) * cos_phi[3] * (47 * cos_theta[0] + 33 * cos_theta[2]) * std::pow(sin_theta[0], 3)) / 64.; + + data[47] = (-3 * std::sqrt(1001 / (2. * pi)) * cos_phi[4] * (2 + 3 * cos_theta[1]) * std::pow(sin_theta[0], 4)) / 16.; + + data[48] = (3 * std::sqrt(3003 / (2. * pi)) * cos_phi[5] * cos_theta[0] * std::pow(sin_theta[0], 5)) / 16.; + + if (lmax == 6) + return; + + data[49] = (-21 * std::sqrt(715 / pi) * cos_theta[0] * sin_phi[6] * std::pow(sin_theta[0], 6)) / 64.; + + data[50] = (-3 * std::sqrt(5005 / (2. * pi)) * (5 + 7 * cos_theta[1]) * sin_phi[5] * std::pow(sin_theta[0], 5)) / 64.; + + data[51] = (-3 * std::sqrt(385 / pi) * cos_theta[0] * (29 + 91 * cos_theta[1]) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 128.; + + data[52] = (-3 * std::sqrt(385 / pi) * (81 + 148 * cos_theta[1] + 91 * cos_theta[3]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 256.; + + data[53] = (-3 * std::sqrt(35 / pi) * cos_theta[0] * (523 + 396 * cos_theta[1] + 1001 * cos_theta[3]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 512.; + + data[54] = (3 * std::sqrt(35 / (2. * pi)) * sin_phi[1] * (75 * sin_theta[0] + 171 * sin_theta[2] + 55 * sin_theta[4] - 1001 * sin_theta[6])) / 2048.; + + data[55] = -(std::sqrt(105 / pi) * (25 * cos_theta[0] + 243 * cos_theta[2] + 825 * cos_theta[4] + 3003 * cos_theta[6]) * sin_phi[0]) / 4096.; + + data[56] = (-7 * std::sqrt(15 / pi) * (25 * sin_theta[0] + 81 * sin_theta[2] + 165 * sin_theta[4] + 429 * sin_theta[6])) / 2048.; + + data[57] = -(std::sqrt(105 / pi) * cos_phi[0] * (25 * cos_theta[0] + 243 * cos_theta[2] + 825 * cos_theta[4] + 3003 * cos_theta[6])) / 4096.; + + data[58] = (-3 * std::sqrt(35 / (2. * pi)) * cos_phi[1] * (75 * sin_theta[0] + 171 * sin_theta[2] + 55 * sin_theta[4] - 1001 * sin_theta[6])) / 2048.; + + data[59] = (-3 * std::sqrt(35 / pi) * cos_phi[2] * (1442 * cos_theta[0] + 1397 * cos_theta[2] + 1001 * cos_theta[4]) * std::pow(sin_theta[0], 2)) / 1024.; + + data[60] = (3 * std::sqrt(385 / pi) * cos_phi[3] * (81 + 148 * cos_theta[1] + 91 * cos_theta[3]) * std::pow(sin_theta[0], 3)) / 256.; + + data[61] = (-3 * std::sqrt(385 / pi) * cos_phi[4] * (149 * cos_theta[0] + 91 * cos_theta[2]) * std::pow(sin_theta[0], 4)) / 256.; + + data[62] = (3 * std::sqrt(5005 / (2. * pi)) * cos_phi[5] * (5 + 7 * cos_theta[1]) * std::pow(sin_theta[0], 5)) / 64.; + + data[63] = (-21 * std::sqrt(715 / pi) * cos_phi[6] * cos_theta[0] * std::pow(sin_theta[0], 6)) / 64.; + + if (lmax == 7) + return; + + data[64] = (-3 * std::sqrt(12155 / pi) * cos_theta[0] * sin_phi[7] * std::pow(sin_theta[0], 7)) / 32.; + + data[65] = (-3 * std::sqrt(12155 / pi) * (3 + 4 * cos_theta[1]) * sin_phi[6] * std::pow(sin_theta[0], 6)) / 64.; + + data[66] = (-3 * std::sqrt(7293 / (2. * pi)) * cos_theta[0] * (2 + 5 * cos_theta[1]) * sin_phi[5] * std::pow(sin_theta[0], 5)) / 16.; + + data[67] = (-3 * std::sqrt(17017 / pi) * (11 + 19 * cos_theta[1] + 10 * cos_theta[3]) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 128.; + + data[68] = (-3 * std::sqrt(1309 / pi) * cos_theta[0] * (43 + 52 * cos_theta[1] + 65 * cos_theta[3]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 128.; + + data[69] = (-3 * std::sqrt(19635 / pi) * (21 + 42 * cos_theta[1] + 39 * cos_theta[3] + 26 * cos_theta[5]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 512.; + + data[70] = (-3 * std::sqrt(595 / (2. * pi)) * (-8 + 121 * cos_theta[1] + 143 * cos_theta[5]) * sin_phi[1] * sin_theta[1]) / 512.; + + data[71] = (-3 * std::sqrt(17 / pi) * (35 * cos_theta[1] + 154 * cos_theta[3] + 429 * cos_theta[5] + 1430 * cos_theta[7]) * sin_phi[0]) / 2048.; + + data[72] = (-9 * std::sqrt(17 / pi) * (70 * sin_theta[1] + 154 * sin_theta[3] + 286 * sin_theta[5] + 715 * sin_theta[7])) / 4096.; + + data[73] = (-3 * std::sqrt(17 / pi) * cos_phi[0] * (35 * cos_theta[1] + 154 * cos_theta[3] + 429 * cos_theta[5] + 1430 * cos_theta[7])) / 2048.; + + data[74] = (3 * std::sqrt(595 / (2. * pi)) * cos_phi[1] * (-16 * sin_theta[1] - 22 * sin_theta[3] + 143 * sin_theta[7])) / 1024.; + + data[75] = (-3 * std::sqrt(19635 / pi) * cos_phi[2] * (21 + 42 * cos_theta[1] + 39 * cos_theta[3] + 26 * cos_theta[5]) * std::pow(sin_theta[0], 2)) / 512.; + + data[76] = (3 * std::sqrt(1309 / pi) * cos_phi[3] * (138 * cos_theta[0] + 117 * cos_theta[2] + 65 * cos_theta[4]) * std::pow(sin_theta[0], 3)) / 256.; + + data[77] = (-3 * std::sqrt(17017 / pi) * cos_phi[4] * (11 + 19 * cos_theta[1] + 10 * cos_theta[3]) * std::pow(sin_theta[0], 4)) / 128.; + + data[78] = (3 * std::sqrt(7293 / (2. * pi)) * cos_phi[5] * (9 * cos_theta[0] + 5 * cos_theta[2]) * std::pow(sin_theta[0], 5)) / 32.; + + data[79] = (-3 * std::sqrt(12155 / pi) * cos_phi[6] * (3 + 4 * cos_theta[1]) * std::pow(sin_theta[0], 6)) / 64.; + + data[80] = (3 * std::sqrt(12155 / pi) * cos_phi[7] * cos_theta[0] * std::pow(sin_theta[0], 7)) / 32.; + } + + /// Compute derivative of real-spherical harmonic with respect to phi angle and divide by sin(theta). + static void dRlm_dphi_sin_theta(int lmax, double theta, double phi, mdarray& data) + { + assert(lmax <= 8); + + data[0] = 0; + + if (lmax == 0) + return; + + auto cos_theta = SHT::cosxn(lmax, theta); + auto sin_theta = SHT::sinxn(lmax, theta); + auto cos_phi = SHT::cosxn(lmax, phi); + auto sin_phi = SHT::sinxn(lmax, phi); + + data[1] = -(std::sqrt(3 / pi) * cos_phi[0]) / 2.; + + data[2] = 0; + + data[3] = (std::sqrt(3 / pi) * sin_phi[0]) / 2.; + + if (lmax == 1) + return; + + data[4] = -(std::sqrt(15 / pi) * cos_phi[1] * sin_theta[0]) / 2.; + + data[5] = -(std::sqrt(15 / pi) * cos_phi[0] * cos_theta[0]) / 2.; + + data[6] = 0; + + data[7] = (std::sqrt(15 / pi) * cos_theta[0] * sin_phi[0]) / 2.; + + data[8] = -(std::sqrt(15 / pi) * cos_phi[0] * sin_phi[0] * sin_theta[0]); + + if (lmax == 2) + return; + + data[9] = (-3 * std::sqrt(35 / (2. * pi)) * cos_phi[2] * std::pow(sin_theta[0], 2)) / 4.; + + data[10] = -(std::sqrt(105 / pi) * cos_phi[1] * sin_theta[1]) / 4.; + + data[11] = -(std::sqrt(21 / (2. * pi)) * cos_phi[0] * (3 + 5 * cos_theta[1])) / 8.; + + data[12] = 0; + + data[13] = (std::sqrt(21 / (2. * pi)) * (3 + 5 * cos_theta[1]) * sin_phi[0]) / 8.; + + data[14] = -(std::sqrt(105 / pi) * cos_phi[0] * cos_theta[0] * sin_phi[0] * sin_theta[0]); + + data[15] = (3 * std::sqrt(35 / (2. * pi)) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 4.; + + if (lmax == 3) + return; + + data[16] = (-3 * std::sqrt(35 / pi) * cos_phi[3] * std::pow(sin_theta[0], 3)) / 4.; + + data[17] = (-9 * std::sqrt(35 / (2. * pi)) * cos_phi[2] * cos_theta[0] * std::pow(sin_theta[0], 2)) / 4.; + + data[18] = (-3 * std::sqrt(5 / pi) * cos_phi[1] * (3 * sin_theta[0] + 7 * sin_theta[2])) / 16.; + + data[19] = (-3 * std::sqrt(5 / (2. * pi)) * cos_phi[0] * (9 * cos_theta[0] + 7 * cos_theta[2])) / 16.; + + data[20] = 0; + + data[21] = (3 * std::sqrt(5 / (2. * pi)) * cos_theta[0] * (1 + 7 * cos_theta[1]) * sin_phi[0]) / 8.; + + data[22] = (-3 * std::sqrt(5 / pi) * sin_phi[1] * (3 * sin_theta[0] + 7 * sin_theta[2])) / 16.; + + data[23] = (9 * std::sqrt(35 / (2. * pi)) * cos_theta[0] * sin_phi[2] * std::pow(sin_theta[0], 2)) / 4.; + + data[24] = (-3 * std::sqrt(35 / pi) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 4.; + + if (lmax == 4) + return; + + data[25] = (-15 * std::sqrt(77 / (2. * pi)) * cos_phi[4] * std::pow(sin_theta[0], 4)) / 16.; + + data[26] = (-3 * std::sqrt(385 / pi) * cos_phi[3] * cos_theta[0] * std::pow(sin_theta[0], 3)) / 4.; + + data[27] = (-3 * std::sqrt(385 / (2. * pi)) * cos_phi[2] * (7 + 9 * cos_theta[1]) * std::pow(sin_theta[0], 2)) / 32.; + + data[28] = -(std::sqrt(1155 / pi) * cos_phi[1] * (2 * sin_theta[1] + 3 * sin_theta[3])) / 32.; + + data[29] = -(std::sqrt(165 / pi) * cos_phi[0] * (15 + 28 * cos_theta[1] + 21 * cos_theta[3])) / 128.; + + data[30] = 0; + + data[31] = (std::sqrt(165 / pi) * (15 + 28 * cos_theta[1] + 21 * cos_theta[3]) * sin_phi[0]) / 128.; + + data[32] = -(std::sqrt(1155 / pi) * sin_phi[1] * (2 * sin_theta[1] + 3 * sin_theta[3])) / 32.; + + data[33] = (3 * std::sqrt(385 / (2. * pi)) * (7 + 9 * cos_theta[1]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 32.; + + data[34] = (-3 * std::sqrt(385 / pi) * cos_theta[0] * sin_phi[3] * std::pow(sin_theta[0], 3)) / 4.; + + data[35] = (15 * std::sqrt(77 / (2. * pi)) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 16.; + + if (lmax == 5) + return; + + data[36] = (-3 * std::sqrt(3003 / (2. * pi)) * cos_phi[5] * std::pow(sin_theta[0], 5)) / 16.; + + data[37] = (-15 * std::sqrt(1001 / (2. * pi)) * cos_phi[4] * cos_theta[0] * std::pow(sin_theta[0], 4)) / 16.; + + data[38] = (-3 * std::sqrt(91 / pi) * cos_phi[3] * (9 + 11 * cos_theta[1]) * std::pow(sin_theta[0], 3)) / 16.; + + data[39] = (-3 * std::sqrt(1365 / (2. * pi)) * cos_phi[2] * (21 * cos_theta[0] + 11 * cos_theta[2]) * std::pow(sin_theta[0], 2)) / 64.; + + data[40] = -(std::sqrt(1365 / (2. * pi)) * cos_phi[1] * (10 * sin_theta[0] + 27 * sin_theta[2] + 33 * sin_theta[4])) / 256.; + + data[41] = -(std::sqrt(273 / pi) * cos_phi[0] * (50 * cos_theta[0] + 45 * cos_theta[2] + 33 * cos_theta[4])) / 256.; + + data[42] = 0; + + data[43] = (std::sqrt(273 / pi) * cos_theta[0] * (19 + 12 * cos_theta[1] + 33 * cos_theta[3]) * sin_phi[0]) / 128.; + + data[44] = -(std::sqrt(1365 / (2. * pi)) * sin_phi[1] * (10 * sin_theta[0] + 27 * sin_theta[2] + 33 * sin_theta[4])) / 256.; + + data[45] = (3 * std::sqrt(1365 / (2. * pi)) * cos_theta[0] * (5 + 11 * cos_theta[1]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 32.; + + data[46] = (-3 * std::sqrt(91 / pi) * (9 + 11 * cos_theta[1]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 16.; + + data[47] = (15 * std::sqrt(1001 / (2. * pi)) * cos_theta[0] * sin_phi[4] * std::pow(sin_theta[0], 4)) / 16.; + + data[48] = (-3 * std::sqrt(3003 / (2. * pi)) * sin_phi[5] * std::pow(sin_theta[0], 5)) / 16.; + + if (lmax == 6) + return; + + data[49] = (-21 * std::sqrt(715 / pi) * cos_phi[6] * std::pow(sin_theta[0], 6)) / 64.; + + data[50] = (-9 * std::sqrt(5005 / (2. * pi)) * cos_phi[5] * cos_theta[0] * std::pow(sin_theta[0], 5)) / 16.; + + data[51] = (-15 * std::sqrt(385 / pi) * cos_phi[4] * (11 + 13 * cos_theta[1]) * std::pow(sin_theta[0], 4)) / 128.; + + data[52] = (-3 * std::sqrt(385 / pi) * cos_phi[3] * (27 * cos_theta[0] + 13 * cos_theta[2]) * std::pow(sin_theta[0], 3)) / 32.; + + data[53] = (-9 * std::sqrt(35 / pi) * cos_phi[2] * (189 + 308 * cos_theta[1] + 143 * cos_theta[3]) * std::pow(sin_theta[0], 2)) / 512.; + + data[54] = (-3 * std::sqrt(35 / (2. * pi)) * cos_phi[1] * (75 * sin_theta[1] + 132 * sin_theta[3] + 143 * sin_theta[5])) / 512.; + + data[55] = -(std::sqrt(105 / pi) * cos_phi[0] * (350 + 675 * cos_theta[1] + 594 * cos_theta[3] + 429 * cos_theta[5])) / 2048.; + + data[56] = 0; + + data[57] = (std::sqrt(105 / pi) * (350 + 675 * cos_theta[1] + 594 * cos_theta[3] + 429 * cos_theta[5]) * sin_phi[0]) / 2048.; + + data[58] = (-3 * std::sqrt(35 / (2. * pi)) * sin_phi[1] * (75 * sin_theta[1] + 132 * sin_theta[3] + 143 * sin_theta[5])) / 512.; + + data[59] = (9 * std::sqrt(35 / pi) * (189 + 308 * cos_theta[1] + 143 * cos_theta[3]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 512.; + + data[60] = (-3 * std::sqrt(385 / pi) * cos_theta[0] * (7 + 13 * cos_theta[1]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 16.; + + data[61] = (15 * std::sqrt(385 / pi) * (11 + 13 * cos_theta[1]) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 128.; + + data[62] = (-9 * std::sqrt(5005 / (2. * pi)) * cos_theta[0] * sin_phi[5] * std::pow(sin_theta[0], 5)) / 16.; + + data[63] = (21 * std::sqrt(715 / pi) * sin_phi[6] * std::pow(sin_theta[0], 6)) / 64.; + + if (lmax == 7) + return; + + data[64] = (-3 * std::sqrt(12155 / pi) * cos_phi[7] * std::pow(sin_theta[0], 7)) / 32.; + + data[65] = (-21 * std::sqrt(12155 / pi) * cos_phi[6] * cos_theta[0] * std::pow(sin_theta[0], 6)) / 64.; + + data[66] = (-3 * std::sqrt(7293 / (2. * pi)) * cos_phi[5] * (13 + 15 * cos_theta[1]) * std::pow(sin_theta[0], 5)) / 64.; + + data[67] = (-15 * std::sqrt(17017 / pi) * cos_phi[4] * (11 * cos_theta[0] + 5 * cos_theta[2]) * std::pow(sin_theta[0], 4)) / 256.; + + data[68] = (-3 * std::sqrt(1309 / pi) * cos_phi[3] * (99 + 156 * cos_theta[1] + 65 * cos_theta[3]) * std::pow(sin_theta[0], 3)) / 256.; + + data[69] = (-3 * std::sqrt(19635 / pi) * cos_phi[2] * (126 * cos_theta[0] + 91 * cos_theta[2] + 39 * cos_theta[4]) * std::pow(sin_theta[0], 2)) / 1024.; + + data[70] = (-3 * std::sqrt(595 / (2. * pi)) * cos_phi[1] * (35 * sin_theta[0] + 11 * (9 * sin_theta[2] + 13 * (sin_theta[4] + sin_theta[6])))) / 2048.; + + data[71] = (-3 * std::sqrt(17 / pi) * cos_phi[0] * (1225 * cos_theta[0] + 11 * (105 * cos_theta[2] + 91 * cos_theta[4] + 65 * cos_theta[6]))) / 4096.; + + data[72] = 0; + + data[73] = (3 * std::sqrt(17 / pi) * cos_theta[0] * (178 + 869 * cos_theta[1] + 286 * cos_theta[3] + 715 * cos_theta[5]) * sin_phi[0]) / 2048.; + + data[74] = (-3 * std::sqrt(595 / (2. * pi)) * sin_phi[1] * (35 * sin_theta[0] + 11 * (9 * sin_theta[2] + 13 * (sin_theta[4] + sin_theta[6])))) / 2048.; + + data[75] = (3 * std::sqrt(19635 / pi) * cos_theta[0] * (37 + 52 * cos_theta[1] + 39 * cos_theta[3]) * sin_phi[2] * std::pow(sin_theta[0], 2)) / 512.; + + data[76] = (-3 * std::sqrt(1309 / pi) * (99 + 156 * cos_theta[1] + 65 * cos_theta[3]) * sin_phi[3] * std::pow(sin_theta[0], 3)) / 256.; + + data[77] = (15 * std::sqrt(17017 / pi) * (11 * cos_theta[0] + 5 * cos_theta[2]) * sin_phi[4] * std::pow(sin_theta[0], 4)) / 256.; + + data[78] = (-3 * std::sqrt(7293 / (2. * pi)) * (13 + 15 * cos_theta[1]) * sin_phi[5] * std::pow(sin_theta[0], 5)) / 64.; + + data[79] = (21 * std::sqrt(12155 / pi) * cos_theta[0] * sin_phi[6] * std::pow(sin_theta[0], 6)) / 64.; + + data[80] = (-3 * std::sqrt(12155 / pi) * sin_phi[7] * std::pow(sin_theta[0], 7)) / 32.; + } + + /// convert 3x3 transformation matrix to SU2 2x2 matrix + /// Create quaternion components from the 3x3 matrix. The components are just a w = Cos(\Omega/2) + /// and {x,y,z} = unit rotation vector multiplied by Sin[\Omega/2] + /// see https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation + /// and https://en.wikipedia.org/wiki/Rotation_group_SO(3)#Quaternions_of_unit_norm + static mdarray rotation_matrix_su2(const matrix3d& m) + { + double det = m.det() > 0 ? 1.0 : -1.0; + + matrix3d mat = m * det; + mdarray su2mat(2, 2); + + su2mat.zero(); + + /* make quaternion components*/ + double w = sqrt(std::max(0., 1. + mat(0, 0) + mat(1, 1) + mat(2, 2))) / 2.; + double x = sqrt(std::max(0., 1. + mat(0, 0) - mat(1, 1) - mat(2, 2))) / 2.; + double y = sqrt(std::max(0., 1. - mat(0, 0) + mat(1, 1) - mat(2, 2))) / 2.; + double z = sqrt(std::max(0., 1. - mat(0, 0) - mat(1, 1) + mat(2, 2))) / 2.; + x = std::copysign(x, mat(2, 1) - mat(1, 2)); + y = std::copysign(y, mat(0, 2) - mat(2, 0)); + z = std::copysign(z, mat(1, 0) - mat(0, 1)); + + su2mat(0, 0) = double_complex(w, -z); + su2mat(1, 1) = double_complex(w, z); + su2mat(0, 1) = double_complex(-y, -x); + su2mat(1, 0) = double_complex(y, -x); + + return std::move(su2mat); + } + + /// Compute the derivatives of real spherical harmonics over the components of cartesian vector. + /** The following derivative is computed: + * \f[ + * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial r_{\mu}} = + * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial \theta_r} \frac{\partial \theta_r}{\partial r_{\mu}} + + * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial \phi_r} \frac{\partial \phi_r}{\partial r_{\mu}} + * \f] + * The derivatives of angles are: + * \f[ + * \frac{\partial \theta_r}{\partial r_{x}} = \frac{\cos(\phi_r) \cos(\theta_r)}{r} \\ + * \frac{\partial \theta_r}{\partial r_{y}} = \frac{\cos(\theta_r) \sin(\phi_r)}{r} \\ + * \frac{\partial \theta_r}{\partial r_{z}} = -\frac{\sin(\theta_r)}{r} + * \f] + * and + * \f[ + * \frac{\partial \phi_r}{\partial r_{x}} = -\frac{\sin(\phi_r)}{\sin(\theta_r) r} \\ + * \frac{\partial \phi_r}{\partial r_{y}} = \frac{\cos(\phi_r)}{\sin(\theta_r) r} \\ + * \frac{\partial \phi_r}{\partial r_{z}} = 0 + * \f] + * The derivative of \f$ \phi \f$ has discontinuities at \f$ \theta = 0, \theta=\pi \f$. This, however, is not a problem, because + * multiplication by the the derivative of \f$ R_{\ell m} \f$ removes it. The following functions have to be hardcoded: + * \f[ + * \frac{\partial R_{\ell m}(\theta, \phi)}{\partial \theta} \\ + * \frac{\partial R_{\ell m}(\theta, \phi)}{\partial \phi} \frac{1}{\sin(\theta)} + * \f] + * + * Mathematica script for spherical harmonic derivatives: + \verbatim + Rlm[l_, m_, th_, ph_] := + If[m > 0, Sqrt[2]*ComplexExpand[Re[SphericalHarmonicY[l, m, th, ph]]], + If[m < 0, Sqrt[2]*ComplexExpand[Im[SphericalHarmonicY[l, m, th, ph]]], + If[m == 0, ComplexExpand[Re[SphericalHarmonicY[l, 0, th, ph]]]] + ] + ] + Do[Print[FullSimplify[D[Rlm[l, m, theta, phi], theta]]], {l, 0, 4}, {m, -l, l}] + Do[Print[FullSimplify[TrigExpand[D[Rlm[l, m, theta, phi], phi]/Sin[theta]]]], {l, 0, 4}, {m, -l, l}] + \endverbatim + */ + static void dRlm_dr(int lmax__, vector3d& r__, mdarray& data__) + { + /* get spherical coordinates of the Cartesian vector */ + auto vrs = spherical_coordinates(r__); + + if (vrs[0] < 1e-12) { + data__.zero(); + return; + } + + int lmmax = (lmax__ + 1) * (lmax__ + 1); + + double theta = vrs[1]; + double phi = vrs[2]; + + vector3d dtheta_dr({std::cos(phi) * std::cos(theta), std::cos(theta) * std::sin(phi), -std::sin(theta)}); + vector3d dphi_dr({-std::sin(phi), std::cos(phi), 0.0}); + + mdarray dRlm_dt(lmmax); + mdarray dRlm_dp_sin_t(lmmax); + + dRlm_dtheta(lmax__, theta, phi, dRlm_dt); + dRlm_dphi_sin_theta(lmax__, theta, phi, dRlm_dp_sin_t); + + for (int mu = 0; mu < 3; mu++) { + for (int lm = 0; lm < lmmax; lm++) { + data__(lm, mu) = (dRlm_dt[lm] * dtheta_dr[mu] + dRlm_dp_sin_t[lm] * dphi_dr[mu]) / vrs[0]; + } + } + } + + /// Generate \f$ \cos(m x) \f$ for m in [1, n] using recursion. + static mdarray cosxn(int n__, double x__) + { + assert(n__ > 0); + mdarray data(n__); + data[0] = std::cos(x__); + if (n__ > 1) { + data[1] = std::cos(2 * x__); + for (int i = 2; i < n__; i++) { + data[i] = 2 * data[0] * data[i - 1] - data[i - 2]; + } + } + return std::move(data); + } + + /// Generate \f$ \sin(m x) \f$ for m in [1, n] using recursion. + static mdarray sinxn(int n__, double x__) + { + assert(n__ > 0); + mdarray data(n__); + auto cosx = std::cos(x__); + data[0] = std::sin(x__); + if (n__ > 1) { + data[1] = std::sin(2 * x__); + for (int i = 2; i < n__; i++) { + data[i] = 2 * cosx * data[i - 1] - data[i - 2]; + } + } + return std::move(data); + } +}; + +template <> +inline void SHT::backward_transform(int ld, double const* flm, int nr, int lmmax, double* ftp) +{ + assert(lmmax <= lmmax_); + assert(ld >= lmmax); + linalg::gemm(1, 0, num_points_, nr, lmmax, &rlm_backward_(0, 0), lmmax_, flm, ld, ftp, num_points_); +} + +template <> +inline void SHT::backward_transform(int ld, double_complex const* flm, int nr, int lmmax, double_complex* ftp) +{ + assert(lmmax <= lmmax_); + assert(ld >= lmmax); + linalg::gemm(1, 0, num_points_, nr, lmmax, &ylm_backward_(0, 0), lmmax_, flm, ld, ftp, num_points_); +} + +template <> +inline void SHT::forward_transform(double const* ftp, int nr, int lmmax, int ld, double* flm) +{ + assert(lmmax <= lmmax_); + assert(ld >= lmmax); + linalg::gemm(1, 0, lmmax, nr, num_points_, &rlm_forward_(0, 0), num_points_, ftp, num_points_, flm, ld); +} + +template <> +inline void SHT::forward_transform(double_complex const* ftp, int nr, int lmmax, int ld, double_complex* flm) +{ + assert(lmmax <= lmmax_); + assert(ld >= lmmax); + linalg::gemm(1, 0, lmmax, nr, num_points_, &ylm_forward_(0, 0), num_points_, ftp, num_points_, flm, ld); +} + +} // namespace sirius + +#endif // __SHT_HPP__ diff --git a/src/Unit_cell/atom.hpp b/src/Unit_cell/atom.hpp index 8c049a4e0..d0459a8c5 100644 --- a/src/Unit_cell/atom.hpp +++ b/src/Unit_cell/atom.hpp @@ -25,7 +25,7 @@ #ifndef __ATOM_HPP__ #define __ATOM_HPP__ -#include "gaunt.h" +#include "SHT/gaunt.hpp" #include "atom_symmetry_class.hpp" #include "sddk.hpp" #include "spheric_function.h" diff --git a/src/Unit_cell/atom_type.hpp b/src/Unit_cell/atom_type.hpp index 2455cf174..1ae4d8716 100644 --- a/src/Unit_cell/atom_type.hpp +++ b/src/Unit_cell/atom_type.hpp @@ -31,9 +31,9 @@ #include "geometry3d.hpp" #include "radial_grid.h" #include "radial_solver.h" -#include "xc_functional.h" +#include "Potential/xc_functional.hpp" #include "simulation_parameters.h" -#include "sht.h" +//#include "SHT/sht.hpp" #include "radial_functions_index.hpp" #include "basis_functions_index.hpp" #include "hubbard_orbitals_descriptor.hpp" diff --git a/src/dft_ground_state.h b/src/dft_ground_state.h index be156bbaf..90e5a066b 100644 --- a/src/dft_ground_state.h +++ b/src/dft_ground_state.h @@ -25,7 +25,7 @@ #ifndef __DFT_GROUND_STATE_H__ #define __DFT_GROUND_STATE_H__ -#include "k_point_set.h" +#include "K_point/k_point_set.hpp" #include "utils/json.hpp" #include "Hubbard/hubbard.hpp" #include "Geometry/stress.hpp" diff --git a/src/gaunt.h b/src/gaunt.h deleted file mode 100644 index 80adf2cc1..000000000 --- a/src/gaunt.h +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, are permitted provided that -// the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the -// following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions -// and the following disclaimer in the documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/** \file gaunt.h - * - * \brief Contains definition and implementation of sirius::Gaunt class. - */ - -#ifndef __GAUNT_H__ -#define __GAUNT_H__ - -#include "mdarray.hpp" - -namespace sirius { - -/// Used in the {lm1, lm2} : {lm3, coefficient} way of grouping non-zero Gaunt coefficients -template -struct gaunt_L3 -{ - int lm3; - int l3; - T coef; -}; - -/// Used in the {lm1, lm2, coefficient} : {lm3} way of grouping non-zero Gaunt coefficients -template -struct gaunt_L1_L2 -{ - int lm1; - int lm2; - T coef; -}; - -/// Compact storage of non-zero Gaunt coefficients \f$ \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle \f$. -/** Very important! The following notation is adopted and used everywhere: lm1 and lm2 represent 'bra' and 'ket' - * spherical harmonics of the Gaunt integral and lm3 represent the inner spherical harmonic. - */ -template -class Gaunt_coefficients -{ - private: - - /// lmax of - int lmax2_; - /// lmmax of |lm2> - int lmmax2_; - - /// List of non-zero Gaunt coefficients for each lm3. - mdarray>, 1> gaunt_packed_L1_L2_; - - /// List of non-zero Gaunt coefficients for each combination of lm1, lm2. - mdarray>, 2> gaunt_packed_L3_; - - public: - - /// Class constructor. - Gaunt_coefficients(int lmax1__, - int lmax3__, - int lmax2__, - std::function get__) - : lmax1_(lmax1__) - , lmax3_(lmax3__) - , lmax2_(lmax2__) - { - lmmax1_ = utils::lmmax(lmax1_); - lmmax3_ = utils::lmmax(lmax3_); - lmmax2_ = utils::lmmax(lmax2_); - - gaunt_packed_L1_L2_ = mdarray>, 1>(lmmax3_); - gaunt_L1_L2 g12; - - gaunt_packed_L3_ = mdarray>, 2>(lmmax1_, lmmax2_); - gaunt_L3 g3; - - for (int l1 = 0, lm1 = 0; l1 <= lmax1_; l1++) { - for (int m1 = -l1; m1 <= l1; m1++, lm1++) { - for (int l2 = 0, lm2 = 0; l2 <= lmax2_; l2++) { - for (int m2 = -l2; m2 <= l2; m2++, lm2++) { - for (int l3 = 0, lm3 = 0; l3 <= lmax3_; l3++) { - for (int m3 = -l3; m3 <= l3; m3++, lm3++) { - - T gc = get__(l1, l3, l2, m1, m3, m2); - if (std::abs(gc) > 1e-12) { - g12.lm1 = lm1; - g12.lm2 = lm2; - g12.coef = gc; - gaunt_packed_L1_L2_[lm3].push_back(g12); - - g3.lm3 = lm3; - g3.l3 = l3; - g3.coef = gc; - gaunt_packed_L3_(lm1, lm2).push_back(g3); - } - } - } - } - } - } - } - } - - /// Return number of non-zero Gaunt coefficients for a given lm3. - inline int num_gaunt(int lm3) const - { - assert(lm3 >= 0 && lm3 < lmmax3_); - return static_cast(gaunt_packed_L1_L2_[lm3].size()); - } - - /// Return a structure containing {lm1, lm2, coef} for a given lm3 and index. - /** Example: - * \code{.cpp} - * for (int lm3 = 0; lm3 < lmmax3; lm3++) - * { - * for (int i = 0; i < gaunt_coefs.num_gaunt(lm3); i++) { - * int lm1 = gaunt_coefs.gaunt(lm3, i).lm1; - * int lm2 = gaunt_coefs.gaunt(lm3, i).lm2; - * double coef = gaunt_coefs.gaunt(lm3, i).coef; - * - * // do something with lm1,lm2,lm3 and coef - * } - * } - * \endcode - */ - inline gaunt_L1_L2 const& gaunt(int lm3, int idx) const - { - assert(lm3 >= 0 && lm3 < lmmax3_); - assert(idx >= 0 && idx < (int)gaunt_packed_L1_L2_[lm3].size()); - return gaunt_packed_L1_L2_[lm3][idx]; - } - - /// Return number of non-zero Gaunt coefficients for a combination of lm1 and lm2. - inline int num_gaunt(int lm1, int lm2) const - { - return static_cast(gaunt_packed_L3_(lm1, lm2).size()); - } - - /// Return a structure containing {lm3, coef} for a given lm1, lm2 and index - inline gaunt_L3 const& gaunt(int lm1, int lm2, int idx) const - { - return gaunt_packed_L3_(lm1, lm2)[idx]; - } - - /// Return a sum over L3 (lm3) index of Gaunt coefficients and a complex vector. - /** The following operation is performed: - * \f[ - * \sum_{\ell_3 m_3} \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle v_{\ell_3 m_3} - * \f] - * Result is assumed to be complex. - */ - inline double_complex sum_L3_gaunt(int lm1, int lm2, double_complex const* v) const - { - double_complex zsum(0, 0); - for (int k = 0; k < (int)gaunt_packed_L3_(lm1, lm2).size(); k++) { - zsum += gaunt_packed_L3_(lm1, lm2)[k].coef * v[gaunt_packed_L3_(lm1, lm2)[k].lm3]; - } - return zsum; - } - - /// Return a sum over L3 (lm3) index of Gaunt coefficients and a real vector. - /** The following operation is performed: - * \f[ - * \sum_{\ell_3 m_3} \langle \ell_1 m_1 | \ell_3 m_3 | \ell_2 m_2 \rangle v_{\ell_3 m_3} - * \f] - * Result is assumed to be of the same type as Gaunt coefficients. - */ - inline T sum_L3_gaunt(int lm1, int lm2, double const* v) const - { - T sum = 0; - for (int k = 0; k < (int)gaunt_packed_L3_(lm1, lm2).size(); k++) { - sum += gaunt_packed_L3_(lm1, lm2)[k].coef * v[gaunt_packed_L3_(lm1, lm2)[k].lm3]; - } - return sum; - } - - /// Return vector of non-zero Gaunt coefficients for a given combination of lm1 and lm2 - inline std::vector> const& gaunt_vector(int lm1, int lm2) const - { - return gaunt_packed_L3_(lm1, lm2); - } -}; - -}; - -#endif - diff --git a/src/k_point.h b/src/k_point.h deleted file mode 100644 index ded5c2622..000000000 --- a/src/k_point.h +++ /dev/null @@ -1,833 +0,0 @@ -// Copyright (c) 2013-2016 Anton Kozhevnikov, Thomas Schulthess -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, are permitted provided that -// the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the -// following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions -// and the following disclaimer in the documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/** \file k_point.h - * - * \brief Contains definition and partial implementation of sirius::K_point class. - */ - -#ifndef __K_POINT_H__ -#define __K_POINT_H__ - -#include "periodic_function.h" -#include "matching_coefficients.h" -#include "Beta_projectors/beta_projectors.h" -#include "wave_functions.hpp" - -namespace sirius -{ - -/// K-point related variables and methods. -/** \image html wf_storage.png "Wave-function storage" - * \image html fv_eigen_vectors.png "First-variational eigen vectors" */ -class K_point -{ - private: - - /// Simulation context. - Simulation_context& ctx_; - - /// Unit cell object. - Unit_cell const& unit_cell_; - - /// Weight of k-point. - double weight_; - - /// Fractional k-point coordinates. - vector3d vk_; - - /// List of G-vectors with |G+k| < cutoff. - std::unique_ptr gkvec_; - - /// G-vector distribution for the FFT transformation. - std::unique_ptr gkvec_partition_; - - /// First-variational eigen values - std::vector fv_eigen_values_; - - /// First-variational eigen vectors, distributed over 2D BLACS grid. - dmatrix fv_eigen_vectors_; - - /// First-variational eigen vectors, distributed in slabs. - std::unique_ptr fv_eigen_vectors_slab_; - - /// Lowest eigen-vectors of the LAPW overlap matrix with small aigen-values. - std::unique_ptr singular_components_; - - /// Second-variational eigen vectors. - /** Second-variational eigen-vectors are stored as one or two \f$ N_{fv} \times N_{fv} \f$ matrices in - * case of non-magnetic or collinear magnetic case or as a single \f$ 2 N_{fv} \times 2 N_{fv} \f$ - * matrix in case of general non-collinear magnetism. */ - dmatrix sv_eigen_vectors_[2]; - - /// Full-diagonalization eigen vectors. - mdarray fd_eigen_vectors_; - - /// First-variational states. - std::unique_ptr fv_states_{nullptr}; - - /// Two-component (spinor) wave functions describing the bands. - std::unique_ptr spinor_wave_functions_{nullptr}; - - /// Two-component (spinor) hubbard wave functions where the S matrix is applied (if ppus). - std::unique_ptr hubbard_wave_functions_{nullptr}; - - /// Band occupation numbers. - mdarray band_occupancies_; - - /// Band energies. - mdarray band_energies_; - - /// LAPW matching coefficients for the row G+k vectors. - /** Used to setup the distributed LAPW Hamiltonian and overlap matrices. */ - std::unique_ptr alm_coeffs_row_{nullptr}; - - /// LAPW matching coefficients for the column G+k vectors. - /** Used to setup the distributed LAPW Hamiltonian and overlap matrices. */ - std::unique_ptr alm_coeffs_col_{nullptr}; - - /// LAPW matching coefficients for the local set G+k vectors. - std::unique_ptr alm_coeffs_loc_{nullptr}; - - /// Mapping between local row and global G+k vecotor index. - /** Used by matching_coefficients class. */ - std::vector igk_row_; - - /// Mapping between local column and global G+k vecotor index. - /** Used by matching_coefficients class. */ - std::vector igk_col_; - - /// Mapping between local and global G+k vecotor index. - /** Used by matching_coefficients class. */ - std::vector igk_loc_; - - /// Number of G+k vectors distributed along rows of MPI grid - int num_gkvec_row_{0}; - - /// Number of G+k vectors distributed along columns of MPI grid - int num_gkvec_col_{0}; - - /// Offset of the local fraction of G+k vectors in the global index. - int gkvec_offset_{0}; - - /// Basis descriptors distributed between rows of the 2D MPI grid. - /** This is a local array. Only MPI ranks belonging to the same column have identical copies of this array. */ - std::vector lo_basis_descriptors_row_; - - /// Basis descriptors distributed between columns of the 2D MPI grid. - /** This is a local array. Only MPI ranks belonging to the same row have identical copies of this array. */ - std::vector lo_basis_descriptors_col_; - - /// List of columns of the Hamiltonian and overlap matrix lo block (local index) for a given atom. - std::vector> atom_lo_cols_; - - /// list of rows of the Hamiltonian and overlap matrix lo block (local index) for a given atom - std::vector> atom_lo_rows_; - - /// Imaginary unit to the power of l. - std::vector zil_; - - /// Mapping between lm and l. - std::vector l_by_lm_; - - /// Column rank of the processors of ScaLAPACK/ELPA diagonalization grid. - int rank_col_; - - /// Number of processors along the columns of the diagonalization grid. - int num_ranks_col_; - - /// Row rank of the processors of ScaLAPACK/ELPA diagonalization grid. - int rank_row_; - - /// Number of processors along the rows of the diagonalization grid. - int num_ranks_row_; - - /// Beta projectors for a local set of G+k vectors. - std::unique_ptr beta_projectors_{nullptr}; - - /// Beta projectors for row G+k vectors. - /** Used to setup the full Hamiltonian in PP-PW case (for verification purpose only) */ - std::unique_ptr beta_projectors_row_{nullptr}; - - /// Beta projectors for column G+k vectors. - /** Used to setup the full Hamiltonian in PP-PW case (for verification purpose only) */ - std::unique_ptr beta_projectors_col_{nullptr}; - - /// Preconditioner matrix for Chebyshev solver. - mdarray p_mtrx_; - - /// Communicator for parallelization inside k-point. - /** This communicator is used to split G+k vectors and wave-functions. */ - Communicator const& comm_; - - /// Communicator between(!!) rows. - Communicator const& comm_row_; - - /// Communicator between(!!) columns. - Communicator const& comm_col_; - - /// Generate G+k and local orbital basis sets. - inline void generate_gklo_basis(); - - /// Test orthonormalization of first-variational states. - inline void test_fv_states(); - - inline double& band_energy_aux(int j__, int ispn__) - { - if (ctx_.num_mag_dims() == 3) { - return band_energies_(j__, 0); - } else { - if (!(ispn__ == 0 || ispn__ == 1)) { - TERMINATE("wrong spin index"); - } - return band_energies_(j__, ispn__); - } - } - - inline double& band_occupancy_aux(int j__, int ispn__) - { - if (ctx_.num_mag_dims() == 3) { - return band_occupancies_(j__, 0); - } else { - if (!(ispn__ == 0 || ispn__ == 1)) { - TERMINATE("wrong spin index"); - } - return band_occupancies_(j__, ispn__); - } - } - - /// Find G+k vectors within the cutoff. - inline void generate_gkvec(double gk_cutoff__) - { - PROFILE("sirius::K_point::generate_gkvec"); - - if (ctx_.full_potential() && (gk_cutoff__ * unit_cell_.max_mt_radius() > ctx_.lmax_apw()) && - comm_.rank() == 0 && ctx_.control().verbosity_ >= 0) { - std::stringstream s; - s << "G+k cutoff (" << gk_cutoff__ << ") is too large for a given lmax (" - << ctx_.lmax_apw() << ") and a maximum MT radius (" << unit_cell_.max_mt_radius() << ")" << std::endl - << "suggested minimum value for lmax : " << int(gk_cutoff__ * unit_cell_.max_mt_radius()) + 1; - WARNING(s); - } - - if (gk_cutoff__ * 2 > ctx_.pw_cutoff()) { - std::stringstream s; - s << "G+k cutoff is too large for a given plane-wave cutoff" << std::endl - << " pw cutoff : " << ctx_.pw_cutoff() << std::endl - << " doubled G+k cutoff : " << gk_cutoff__ * 2; - TERMINATE(s); - } - - /* create G+k vectors; communicator of the coarse FFT grid is used because wave-functions will be transformed - * only on the coarse grid; G+k-vectors will be distributed between MPI ranks assigned to the k-point */ - gkvec_ = std::unique_ptr(new Gvec(vk_, ctx_.unit_cell().reciprocal_lattice_vectors(), gk_cutoff__, comm(), - ctx_.gamma_point())); - - gkvec_partition_ = std::unique_ptr(new Gvec_partition(*gkvec_, ctx_.comm_fft_coarse(), - ctx_.comm_band_ortho_fft_coarse())); - - gkvec_offset_ = gkvec().gvec_offset(comm().rank()); - } - - public: - - /// Constructor - K_point(Simulation_context& ctx__, - double const* vk__, - double weight__) - : ctx_(ctx__) - , unit_cell_(ctx_.unit_cell()) - , weight_(weight__) - , comm_(ctx_.comm_band()) - , comm_row_(ctx_.blacs_grid().comm_row()) - , comm_col_(ctx_.blacs_grid().comm_col()) - { - PROFILE("sirius::K_point::K_point"); - - for (int x = 0; x < 3; x++) { - vk_[x] = vk__[x]; - } - - band_occupancies_ = mdarray(ctx_.num_bands(), ctx_.num_spin_dims()); - band_occupancies_.zero(); - band_energies_ = mdarray(ctx_.num_bands(), ctx_.num_spin_dims()); - band_energies_.zero(); - - num_ranks_row_ = comm_row_.size(); - num_ranks_col_ = comm_col_.size(); - - rank_row_ = comm_row_.rank(); - rank_col_ = comm_col_.rank(); - } - - /// Initialize the k-point related arrays and data. - inline void initialize(); - - inline void update() - { - PROFILE("sirius::K_point::update"); - - gkvec_->lattice_vectors(ctx_.unit_cell().reciprocal_lattice_vectors()); - - if (ctx_.full_potential()) { - if (ctx_.iterative_solver_input().type_ == "exact") { - alm_coeffs_row_ = std::unique_ptr( - new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_row(), igk_row_, gkvec())); - alm_coeffs_col_ = std::unique_ptr( - new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_col(), igk_col_, gkvec())); - } - alm_coeffs_loc_ = std::unique_ptr( - new Matching_coefficients(unit_cell_, ctx_.lmax_apw(), num_gkvec_loc(), igk_loc_, gkvec())); - } - - if (!ctx_.full_potential()) { - /* compute |beta> projectors for atom types */ - beta_projectors_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_loc_)); - - if (ctx_.iterative_solver_input().type_ == "exact") { - beta_projectors_row_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_row_)); - beta_projectors_col_ = std::unique_ptr(new Beta_projectors(ctx_, gkvec(), igk_col_)); - - } - - //if (false) { - // p_mtrx_ = mdarray(unit_cell_.max_mt_basis_size(), unit_cell_.max_mt_basis_size(), unit_cell_.num_atom_types()); - // p_mtrx_.zero(); - - // for (int iat = 0; iat < unit_cell_.num_atom_types(); iat++) { - // auto& atom_type = unit_cell_.atom_type(iat); - - // if (!atom_type.pp_desc().augment) { - // continue; - // } - // int nbf = atom_type.mt_basis_size(); - // int ofs = atom_type.offset_lo(); - - // matrix qinv(nbf, nbf); - // for (int xi1 = 0; xi1 < nbf; xi1++) { - // for (int xi2 = 0; xi2 < nbf; xi2++) { - // qinv(xi2, xi1) = ctx_.augmentation_op(iat).q_mtrx(xi2, xi1); - // } - // } - // linalg::geinv(nbf, qinv); - // - // /* compute P^{+}*P */ - // linalg::gemm(2, 0, nbf, nbf, num_gkvec_loc(), - // beta_projectors_->beta_gk_t().at(0, ofs), beta_projectors_->beta_gk_t().ld(), - // beta_projectors_->beta_gk_t().at(0, ofs), beta_projectors_->beta_gk_t().ld(), - // &p_mtrx_(0, 0, iat), p_mtrx_.ld()); - // comm().allreduce(&p_mtrx_(0, 0, iat), unit_cell_.max_mt_basis_size() * unit_cell_.max_mt_basis_size()); - - // for (int xi1 = 0; xi1 < nbf; xi1++) { - // for (int xi2 = 0; xi2 < nbf; xi2++) { - // qinv(xi2, xi1) += p_mtrx_(xi2, xi1, iat); - // } - // } - // /* compute (Q^{-1} + P^{+}*P)^{-1} */ - // linalg::geinv(nbf, qinv); - // for (int xi1 = 0; xi1 < nbf; xi1++) { - // for (int xi2 = 0; xi2 < nbf; xi2++) { - // p_mtrx_(xi2, xi1, iat) = qinv(xi2, xi1); - // } - // } - // } - //} - } - - } - - /// Generate first-variational states from eigen-vectors. - /** First-variational states are obtained from the first-variational eigen-vectors and - * LAPW matching coefficients. - * - * APW part: - * \f[ - * \psi_{\xi j}^{\bf k} = \sum_{{\bf G}} Z_{{\bf G} j}^{\bf k} * A_{\xi}({\bf G+k}) - * \f] - */ - void generate_fv_states(); - - #ifdef __GPU - void generate_fv_states_aw_mt_gpu(); - #endif - - /// Generate two-component spinor wave functions - inline void generate_spinor_wave_functions(); - - inline void generate_atomic_centered_wavefunctions(const int num_ao__, Wave_functions &phi); - - inline void generate_atomic_centered_wavefunctions_aux(const int num_ao__, Wave_functions &phi, std::vector &offset, bool hubbard); - - void compute_gradient_wavefunctions(Wave_functions &phi, - const int starting_position_i, - const int num_wf, - Wave_functions &dphi, - const int starting_position_j, - const int direction); - - void save(int id); - - void load(HDF5_tree h5in, int id); - - //== void save_wave_functions(int id); - - //== void load_wave_functions(int id); - - void get_fv_eigen_vectors(mdarray& fv_evec); - - void get_sv_eigen_vectors(mdarray& sv_evec); - - /// Test orthonormalization of spinor wave-functions - void test_spinor_wave_functions(int use_fft); - - /// Get the number of occupied bands for each spin channel. - int num_occupied_bands(int ispn__ = -1) - { - for (int j = ctx_.num_bands() - 1; j >= 0; j--) { - if (std::abs(band_occupancy(j, ispn__) * weight()) > 1e-14) { - return j + 1; - } - } - return 0; - } - - /// Total number of G+k vectors within the cutoff distance - inline int num_gkvec() const - { - return gkvec_->num_gvec(); - } - - /// Total number of muffin-tin and plane-wave expansion coefficients for the wave-functions. - /** APW+lo basis \f$ \varphi_{\mu {\bf k}}({\bf r}) = \{ \varphi_{\bf G+k}({\bf r}), - * \varphi_{j{\bf k}}({\bf r}) \} \f$ is used to expand first-variational wave-functions: - * - * \f[ - * \psi_{i{\bf k}}({\bf r}) = \sum_{\mu} c_{\mu i}^{\bf k} \varphi_{\mu \bf k}({\bf r}) = - * \sum_{{\bf G}}c_{{\bf G} i}^{\bf k} \varphi_{\bf G+k}({\bf r}) + - * \sum_{j}c_{j i}^{\bf k}\varphi_{j{\bf k}}({\bf r}) - * \f] - * - * Inside muffin-tins the expansion is converted into the following form: - * \f[ - * \psi_{i {\bf k}}({\bf r})= \begin{array}{ll} - * \displaystyle \sum_{L} \sum_{\lambda=1}^{N_{\ell}^{\alpha}} - * F_{L \lambda}^{i {\bf k},\alpha}f_{\ell \lambda}^{\alpha}(r) - * Y_{\ell m}(\hat {\bf r}) & {\bf r} \in MT_{\alpha} \end{array} - * \f] - * - * Thus, the total number of coefficients representing a wave-funstion is equal - * to the number of muffin-tin basis functions of the form \f$ f_{\ell \lambda}^{\alpha}(r) - * Y_{\ell m}(\hat {\bf r}) \f$ plust the number of G+k plane waves. */ - //inline int wf_size() const // TODO: better name for this - //{ - // if (ctx_.full_potential()) { - // return unit_cell_.mt_basis_size() + num_gkvec(); - // } else { - // return num_gkvec(); - // } - //} - - inline double& band_energy(int j__, int ispn__) - { - return band_energy_aux(j__, ispn__); - } - - inline double band_energy(int j__, int ispn__) const - { - auto const& e = const_cast(this)->band_energy_aux(j__, ispn__); - return e; - } - - inline double& band_occupancy(int j__, int ispn__) - { - return band_occupancy_aux(j__, ispn__); - } - - inline double band_occupancy(int j__, int ispn__) const - { - auto const& e = const_cast(this)->band_occupancy_aux(j__, ispn__); - return e; - } - - inline double fv_eigen_value(int i) const - { - return fv_eigen_values_[i]; - } - - void set_fv_eigen_values(double* eval) - { - std::memcpy(&fv_eigen_values_[0], eval, ctx_.num_fv_states() * sizeof(double)); - } - - inline double weight() const - { - return weight_; - } - - inline Wave_functions& fv_states() - { - return *fv_states_; - } - - inline Wave_functions& spinor_wave_functions() - { - return *spinor_wave_functions_; - } - - inline Wave_functions& hubbard_wave_functions() - { - return *hubbard_wave_functions_; - } - - inline Wave_functions const& hubbard_wave_functions() const - { - return *hubbard_wave_functions_; - } - - inline void allocate_hubbard_wave_functions(int size) - { - if (hubbard_wave_functions_ != nullptr) { - return; - } - const int num_sc = ctx_.num_mag_dims() == 3 ? 2 : 1; - hubbard_wave_functions_ = std::unique_ptr(new Wave_functions(gkvec_partition(), - size, - num_sc)); - } - - inline bool hubbard_wave_functions_calculated() - { - return (hubbard_wave_functions_ != nullptr); - } - - inline Wave_functions& singular_components() - { - return *singular_components_; - } - - inline vector3d vk() const - { - return vk_; - } - - /// Basis size of LAPW+lo method. - inline int gklo_basis_size() const - { - return static_cast(num_gkvec() + unit_cell_.mt_lo_basis_size()); - } - - /// Local number of G+k vectors in case of flat distribution. - inline int num_gkvec_loc() const - { - return gkvec().count(); - } - - /// Return global index of G+k vector. - inline int idxgk(int igkloc__) const - { - return gkvec_offset_ + igkloc__; - } - - /// Local number of G+k vectors for each MPI rank in the row of the 2D MPI grid. - inline int num_gkvec_row() const - { - return num_gkvec_row_; - } - - /// Local number of local orbitals for each MPI rank in the row of the 2D MPI grid. - inline int num_lo_row() const - { - return static_cast(lo_basis_descriptors_row_.size()); - } - - /// Local number of basis functions for each MPI rank in the row of the 2D MPI grid. - inline int gklo_basis_size_row() const - { - return num_gkvec_row() + num_lo_row(); - } - - /// Local number of G+k vectors for each MPI rank in the column of the 2D MPI grid. - inline int num_gkvec_col() const - { - return num_gkvec_col_; - } - - /// Local number of local orbitals for each MPI rank in the column of the 2D MPI grid. - inline int num_lo_col() const - { - return static_cast(lo_basis_descriptors_col_.size()); - } - - /// Local number of basis functions for each MPI rank in the column of the 2D MPI grid. - inline int gklo_basis_size_col() const - { - return num_gkvec_col() + num_lo_col(); - } - - inline lo_basis_descriptor const& lo_basis_descriptor_col(int idx) const - { - assert(idx >=0 && idx < (int)lo_basis_descriptors_col_.size()); - return lo_basis_descriptors_col_[idx]; - } - - inline lo_basis_descriptor const& lo_basis_descriptor_row(int idx) const - { - assert(idx >= 0 && idx < (int)lo_basis_descriptors_row_.size()); - return lo_basis_descriptors_row_[idx]; - } - - inline int igk_loc(int idx__) const - { - return igk_loc_[idx__]; - } - - inline std::vector const& igk_loc() const - { - return igk_loc_; - } - - inline int igk_row(int idx__) const - { - return igk_row_[idx__]; - } - - inline std::vector const& igk_row() const - { - return igk_row_; - } - - inline int igk_col(int idx__) const - { - return igk_col_[idx__]; - } - - inline std::vector const& igk_col() const - { - return igk_col_; - } - - inline int num_ranks_row() const - { - return num_ranks_row_; - } - - inline int rank_row() const - { - return rank_row_; - } - - inline int num_ranks_col() const - { - return num_ranks_col_; - } - - inline int rank_col() const - { - return rank_col_; - } - - /// Number of MPI ranks for a given k-point - inline int num_ranks() const - { - return comm_.size(); - } - - inline int rank() const - { - return comm_.rank(); - } - - /// Return number of lo columns for a given atom - inline int num_atom_lo_cols(int ia) const - { - return (int)atom_lo_cols_[ia].size(); - } - - /// Return local index (for the current MPI rank) of a column for a given atom and column index within an atom - inline int lo_col(int ia, int i) const - { - return atom_lo_cols_[ia][i]; - } - - /// Return number of lo rows for a given atom - inline int num_atom_lo_rows(int ia) const - { - return (int)atom_lo_rows_[ia].size(); - } - - /// Return local index (for the current MPI rank) of a row for a given atom and row index within an atom - inline int lo_row(int ia, int i) const - { - return atom_lo_rows_[ia][i]; - } - - inline dmatrix& fv_eigen_vectors() - { - return fv_eigen_vectors_; - } - - inline Wave_functions& fv_eigen_vectors_slab() - { - return *fv_eigen_vectors_slab_; - } - - inline dmatrix& sv_eigen_vectors(int ispn) - { - return sv_eigen_vectors_[ispn]; - } - - inline mdarray& fd_eigen_vectors() - { - return fd_eigen_vectors_; - } - - void bypass_sv() - { - std::memcpy(&band_energies_[0], &fv_eigen_values_[0], ctx_.num_fv_states() * sizeof(double)); - } - - inline Gvec const& gkvec() const - { - return *gkvec_; - } - - inline Gvec_partition const& gkvec_partition() const - { - return *gkvec_partition_; - } - - inline Matching_coefficients const& alm_coeffs_row() - { - return *alm_coeffs_row_; - } - - inline Matching_coefficients const& alm_coeffs_col() - { - return *alm_coeffs_col_; - } - - inline Matching_coefficients const& alm_coeffs_loc() const - { - return *alm_coeffs_loc_; - } - - inline Communicator const& comm() const - { - return comm_; - } - - inline Communicator const& comm_row() const - { - return comm_row_; - } - - inline Communicator const& comm_col() const - { - return comm_col_; - } - - inline double_complex p_mtrx(int xi1, int xi2, int iat) const - { - return p_mtrx_(xi1, xi2, iat); - } - - inline mdarray& p_mtrx() - { - return p_mtrx_; - } - - Beta_projectors& beta_projectors() - { - assert(beta_projectors_ != nullptr); - return *beta_projectors_; - } - - Beta_projectors& beta_projectors_row() - { - assert(beta_projectors_ != nullptr); - return *beta_projectors_row_; - } - - Beta_projectors& beta_projectors_col() - { - assert(beta_projectors_ != nullptr); - return *beta_projectors_col_; - } -}; - -#include "K_point/generate_fv_states.hpp" -#include "K_point/generate_spinor_wave_functions.hpp" -#include "K_point/generate_gklo_basis.hpp" -#include "K_point/initialize.hpp" -#include "K_point/k_point.hpp" -#include "K_point/test_fv_states.hpp" -#include "K_point/generate_atomic_centered_wavefunctions.hpp" - -} - -/** \page basis Basis functions for Kohn-Sham wave-functions expansion - * - * \section basis1 LAPW+lo basis - * - * LAPW+lo basis consists of two different sets of functions: LAPW functions \f$ \varphi_{{\bf G+k}} \f$ defined over - * entire unit cell: - * \f[ - * \varphi_{{\bf G+k}}({\bf r}) = \left\{ \begin{array}{ll} - * \displaystyle \sum_{L} \sum_{\nu=1}^{O_{\ell}^{\alpha}} a_{L\nu}^{\alpha}({\bf G+k})u_{\ell \nu}^{\alpha}(r) - * Y_{\ell m}(\hat {\bf r}) & {\bf r} \in {\rm MT} \alpha \\ - * \displaystyle \frac{1}{\sqrt \Omega} e^{i({\bf G+k}){\bf r}} & {\bf r} \in {\rm I} \end{array} \right. - * \f] - * and Bloch sums of local orbitals defined inside muffin-tin spheres only: - * \f[ - * \begin{array}{ll} \displaystyle \varphi_{j{\bf k}}({\bf r})=\sum_{{\bf T}} e^{i{\bf kT}} - * \varphi_{j}({\bf r - T}) & {\rm {\bf r} \in MT} \end{array} - * \f] - * Each local orbital is composed of radial and angular parts: - * \f[ - * \varphi_{j}({\bf r}) = \phi_{\ell_j}^{\zeta_j,\alpha_j}(r) Y_{\ell_j m_j}(\hat {\bf r}) - * \f] - * Radial part of local orbital is defined as a linear combination of radial functions (minimum two radial functions - * are required) such that local orbital vanishes at the sphere boundary: - * \f[ - * \phi_{\ell}^{\zeta, \alpha}(r) = \sum_{p}\gamma_{p}^{\zeta,\alpha} u_{\ell \nu_p}^{\alpha}(r) - * \f] - * - * Arbitrary number of local orbitals can be introduced for each angular quantum number (this is highlighted by - * the index \f$ \zeta \f$). - * - * Radial functions are m-th order (with zero-order being a function itself) energy derivatives of the radial - * Schrödinger equation: - * \f[ - * u_{\ell \nu}^{\alpha}(r) = \frac{\partial^{m_{\nu}}}{\partial^{m_{\nu}}E}u_{\ell}^{\alpha}(r,E)\Big|_{E=E_{\nu}} - * \f] - */ - -/** \page data_dist K-point data distribution - * - * \section data_dist1 "Panel" and "full" data storage - * - * We have to deal with big arrays (matching coefficients, eigen vectors, wave functions, etc.) which may not fit - * into the memory of a single node. For some operations we need a "panel" distribution of data, where each - * MPI rank gets a local panel of block-cyclic distributed matrix. This way of storing data is necessary for the - * distributed PBLAS and ScaLAPACK operations. - * - */ - - -#endif // __K_POINT_H__ diff --git a/src/sht.h b/src/sht.h deleted file mode 100644 index 5eda743b6..000000000 --- a/src/sht.h +++ /dev/null @@ -1,1288 +0,0 @@ -// Copyright (c) 2013-2017 Anton Kozhevnikov, Thomas Schulthess -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, are permitted provided that -// the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the -// following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions -// and the following disclaimer in the documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/** \file sht.h - * - * \brief Contains declaration and particular implementation of sirius::SHT class. - */ - -#ifndef __SHT_H__ -#define __SHT_H__ - -#include -#include -#include -#include -#include -#include -#include - -#include "typedefs.h" -#include "linalg.hpp" -#include "lebedev_grids.hpp" - -namespace sirius { - -/// Spherical harmonics transformations and related oprtations. -/** This class is responsible for the generation of complex and real spherical harmonics, generation of transformation - * matrices, transformation between spectral and real-space representations, generation of Gaunt and Clebsch-Gordan - * coefficients and calculation of spherical harmonic derivatives */ -class SHT // TODO: better name -{ - private: - - /// Maximum \f$ \ell \f$ of spherical harmonics. - int lmax_; - - /// Maximum number of \f$ \ell, m \f$ components. - int lmmax_; - - /// Number of real-space \f$ (\theta, \phi) \f$ points on the sphere. - int num_points_; - - /// Cartesian coordinates of points (normalized to 1). - mdarray coord_; - - /// \f$ (\theta, \phi) \f$ angles of points. - mdarray tp_; - - /// Point weights. - std::vector w_; - - /// Backward transformation from Ylm to spherical coordinates. - mdarray ylm_backward_; - - /// Forward transformation from spherical coordinates to Ylm. - mdarray ylm_forward_; - - /// Backward transformation from Rlm to spherical coordinates. - mdarray rlm_backward_; - - /// Forward transformation from spherical coordinates to Rlm. - mdarray rlm_forward_; - - /// Type of spherical grid (0: Lebedev-Laikov, 1: uniform). - int mesh_type_{0}; - - public: - - /// Default constructor. - SHT(int lmax__) - : lmax_(lmax__) - { - lmmax_ = (lmax_ + 1) * (lmax_ + 1); - - if (mesh_type_ == 0) { - num_points_ = Lebedev_Laikov_npoint(2 * lmax_); - } - if (mesh_type_ == 1) { - num_points_ = lmmax_; - } - - std::vector x(num_points_); - std::vector y(num_points_); - std::vector z(num_points_); - - coord_ = mdarray(3, num_points_); - - tp_ = mdarray(2, num_points_); - - w_.resize(num_points_); - - if (mesh_type_ == 0) Lebedev_Laikov_sphere(num_points_, &x[0], &y[0], &z[0], &w_[0]); - if (mesh_type_ == 1) uniform_coverage(); - - ylm_backward_ = mdarray(lmmax_, num_points_); - - ylm_forward_ = mdarray(num_points_, lmmax_); - - rlm_backward_ = mdarray(lmmax_, num_points_); - - rlm_forward_ = mdarray(num_points_, lmmax_); - - for (int itp = 0; itp < num_points_; itp++) { - if (mesh_type_ == 0) { - coord_(0, itp) = x[itp]; - coord_(1, itp) = y[itp]; - coord_(2, itp) = z[itp]; - - auto vs = spherical_coordinates(vector3d(x[itp], y[itp], z[itp])); - tp_(0, itp) = vs[1]; - tp_(1, itp) = vs[2]; - spherical_harmonics(lmax_, vs[1], vs[2], &ylm_backward_(0, itp)); - spherical_harmonics(lmax_, vs[1], vs[2], &rlm_backward_(0, itp)); - for (int lm = 0; lm < lmmax_; lm++) { - ylm_forward_(itp, lm) = std::conj(ylm_backward_(lm, itp)) * w_[itp] * fourpi; - rlm_forward_(itp, lm) = rlm_backward_(lm, itp) * w_[itp] * fourpi; - } - } - if (mesh_type_ == 1) { - double t = tp_(0, itp); - double p = tp_(1, itp); - - coord_(0, itp) = sin(t) * cos(p); - coord_(1, itp) = sin(t) * sin(p); - coord_(2, itp) = cos(t); - - spherical_harmonics(lmax_, t, p, &ylm_backward_(0, itp)); - spherical_harmonics(lmax_, t, p, &rlm_backward_(0, itp)); - - for (int lm = 0; lm < lmmax_; lm++) { - ylm_forward_(lm, itp) = ylm_backward_(lm, itp); - rlm_forward_(lm, itp) = rlm_backward_(lm, itp); - } - } - } - - if (mesh_type_ == 1) { - linalg::geinv(lmmax_, ylm_forward_); - linalg::geinv(lmmax_, rlm_forward_); - } - - #if (__VERIFICATION > 0) - { - double dr = 0; - double dy = 0; - - for (int lm = 0; lm < lmmax_; lm++) - { - for (int lm1 = 0; lm1 < lmmax_; lm1++) - { - double t = 0; - double_complex zt(0, 0); - for (int itp = 0; itp < num_points_; itp++) - { - zt += ylm_forward_(itp, lm) * ylm_backward_(lm1, itp); - t += rlm_forward_(itp, lm) * rlm_backward_(lm1, itp); - } - - if (lm == lm1) - { - zt -= 1.0; - t -= 1.0; - } - dr += std::abs(t); - dy += std::abs(zt); - } - } - dr = dr / lmmax_ / lmmax_; - dy = dy / lmmax_ / lmmax_; - - if (dr > 1e-15 || dy > 1e-15) - { - std::stringstream s; - s << "spherical mesh error is too big" << std::endl - << " real spherical integration error " << dr << std::endl - << " complex spherical integration error " << dy; - WARNING(s.str()) - } - - std::vector flm(lmmax_); - std::vector ftp(num_points_); - for (int lm = 0; lm < lmmax_; lm++) - { - std::memset(&flm[0], 0, lmmax_ * sizeof(double)); - flm[lm] = 1.0; - backward_transform(lmmax_, &flm[0], 1, lmmax_, &ftp[0]); - forward_transform(&ftp[0], 1, lmmax_, lmmax_, &flm[0]); - flm[lm] -= 1.0; - - double t = 0.0; - for (int lm1 = 0; lm1 < lmmax_; lm1++) t += std::abs(flm[lm1]); - - t /= lmmax_; - - if (t > 1e-15) - { - std::stringstream s; - s << "test of backward / forward real SHT failed" << std::endl - << " total error " << t; - WARNING(s.str()); - } - } - } - #endif - } - - /// Perform a backward transformation from spherical harmonics to spherical coordinates. - /** \f[ - * f(\theta, \phi, r) = \sum_{\ell m} f_{\ell m}(r) Y_{\ell m}(\theta, \phi) - * \f] - * - * \param [in] ld Size of leading dimension of flm. - * \param [in] flm Raw pointer to \f$ f_{\ell m}(r) \f$. - * \param [in] nr Number of radial points. - * \param [in] lmmax Maximum number of lm- harmonics to take into sum. - * \param [out] ftp Raw pointer to \f$ f(\theta, \phi, r) \f$. - */ - template - void backward_transform(int ld, T const* flm, int nr, int lmmax, T* ftp); - - /// Perform a forward transformation from spherical coordinates to spherical harmonics. - /** \f[ - * f_{\ell m}(r) = \iint f(\theta, \phi, r) Y_{\ell m}^{*}(\theta, \phi) \sin \theta d\phi d\theta = - * \sum_{i} f(\theta_i, \phi_i, r) Y_{\ell m}^{*}(\theta_i, \phi_i) w_i - * \f] - * - * \param [in] ftp Raw pointer to \f$ f(\theta, \phi, r) \f$. - * \param [in] nr Number of radial points. - * \param [in] lmmax Maximum number of lm- coefficients to generate. - * \param [in] ld Size of leading dimension of flm. - * \param [out] flm Raw pointer to \f$ f_{\ell m}(r) \f$. - */ - template - void forward_transform(T const* ftp, int nr, int lmmax, int ld, T* flm); - - /// Convert form Rlm to Ylm representation. - static void convert(int lmax__, double const* f_rlm__, double_complex* f_ylm__) - { - int lm = 0; - for (int l = 0; l <= lmax__; l++) { - for (int m = -l; m <= l; m++) { - if (m == 0) { - f_ylm__[lm] = f_rlm__[lm]; - } else { - int lm1 = utils::lm(l, -m); - f_ylm__[lm] = ylm_dot_rlm(l, m, m) * f_rlm__[lm] + ylm_dot_rlm(l, m, -m) * f_rlm__[lm1]; - } - lm++; - } - } - } - - /// Convert from Ylm to Rlm representation. - static void convert(int lmax__, double_complex const* f_ylm__, double* f_rlm__) - { - int lm = 0; - for (int l = 0; l <= lmax__; l++) { - for (int m = -l; m <= l; m++) { - if (m == 0) { - f_rlm__[lm] = std::real(f_ylm__[lm]); - } else { - int lm1 = utils::lm(l, -m); - f_rlm__[lm] = std::real(rlm_dot_ylm(l, m, m) * f_ylm__[lm] + rlm_dot_ylm(l, m, -m) * f_ylm__[lm1]); - } - lm++; - } - } - } - - //void rlm_forward_iterative_transform(double *ftp__, int lmmax, int ncol, double* flm) - //{ - // Timer t("sirius::SHT::rlm_forward_iterative_transform"); - // - // assert(lmmax <= lmmax_); - - // mdarray ftp(ftp__, num_points_, ncol); - // mdarray ftp1(num_points_, ncol); - // - // blas::gemm(1, 0, lmmax, ncol, num_points_, 1.0, &rlm_forward_(0, 0), num_points_, &ftp(0, 0), num_points_, 0.0, - // flm, lmmax); - // - // for (int i = 0; i < 2; i++) - // { - // rlm_backward_transform(flm, lmmax, ncol, &ftp1(0, 0)); - // double tdiff = 0.0; - // for (int ir = 0; ir < ncol; ir++) - // { - // for (int itp = 0; itp < num_points_; itp++) - // { - // ftp1(itp, ir) = ftp(itp, ir) - ftp1(itp, ir); - // //tdiff += fabs(ftp1(itp, ir)); - // } - // } - // - // for (int itp = 0; itp < num_points_; itp++) - // { - // tdiff += fabs(ftp1(itp, ncol - 1)); - // } - // std::cout << "iter : " << i << " avg. MT diff = " << tdiff / num_points_ << std::endl; - // blas::gemm(1, 0, lmmax, ncol, num_points_, 1.0, &rlm_forward_(0, 0), num_points_, &ftp1(0, 0), num_points_, 1.0, - // flm, lmmax); - // } - //} - - /// Transform Cartesian coordinates [x,y,z] to spherical coordinates [r,theta,phi] - static vector3d spherical_coordinates(vector3d vc) - { - vector3d vs; - - const double eps{1e-12}; - - vs[0] = vc.length(); - - if (vs[0] <= eps) { - vs[1] = 0.0; - vs[2] = 0.0; - } else { - vs[1] = std::acos(vc[2] / vs[0]); // theta = cos^{-1}(z/r) - - if (std::abs(vc[0]) > eps || std::abs(vc[1]) > eps) { - vs[2] = std::atan2(vc[1], vc[0]); // phi = tan^{-1}(y/x) - if (vs[2] < 0.0) { - vs[2] += twopi; - } - } else { - vs[2] = 0.0; - } - } - - return vs; - } - - /// Generate complex spherical harmonics Ylm - static void spherical_harmonics(int lmax, double theta, double phi, double_complex* ylm) - { - double x = std::cos(theta); - std::vector result_array(lmax + 1); - - for (int l = 0; l <= lmax; l++) { - for (int m = 0; m <= l; m++) { - double_complex z = std::exp(double_complex(0.0, m * phi)); - ylm[utils::lm(l, m)] = gsl_sf_legendre_sphPlm(l, m, x) * z; - if (m % 2) { - ylm[utils::lm(l, -m)] = -std::conj(ylm[utils::lm(l, m)]); - } else { - ylm[utils::lm(l, -m)] = std::conj(ylm[utils::lm(l, m)]); - } - } - } - } - - /// Generate real spherical harmonics Rlm - /** Mathematica code: - * \verbatim - * R[l_, m_, th_, ph_] := - * If[m > 0, std::sqrt[2]*ComplexExpand[Re[SphericalHarmonicY[l, m, th, ph]]], - * If[m < 0, std::sqrt[2]*ComplexExpand[Im[SphericalHarmonicY[l, m, th, ph]]], - * If[m == 0, ComplexExpand[Re[SphericalHarmonicY[l, 0, th, ph]]]]]] - * \endverbatim - */ - static void spherical_harmonics(int lmax, double theta, double phi, double* rlm) - { - int lmmax = (lmax + 1) * (lmax + 1); - std::vector ylm(lmmax); - spherical_harmonics(lmax, theta, phi, &ylm[0]); - - double const t = std::sqrt(2.0); - - rlm[0] = y00; - - for (int l = 1; l <= lmax; l++) { - for (int m = -l; m < 0; m++) { - rlm[utils::lm(l, m)] = t * ylm[utils::lm(l, m)].imag(); - } - - rlm[utils::lm(l, 0)] = ylm[utils::lm(l, 0)].real(); - - for (int m = 1; m <= l; m++) { - rlm[utils::lm(l, m)] = t * ylm[utils::lm(l, m)].real(); - } - } - } - - /// Generate real or complex spherical harmonics for a given Cartesian vectors. - template - static std::vector spherical_harmonics(int lmax__, vector3d vc__) - { - auto rtp = spherical_coordinates(vc__); - std::vector v(utils::lmmax(lmax__)); - spherical_harmonics(lmax__, rtp[1], rtp[2], v.data()); - return std::move(v); - } - - /// Compute element of the transformation matrix from complex to real spherical harmonics. - /** Real spherical harmonic can be written as a linear combination of complex harmonics: - - \f[ - R_{\ell m}(\theta, \phi) = \sum_{m'} a^{\ell}_{m' m}Y_{\ell m'}(\theta, \phi) - \f] - where - \f[ - a^{\ell}_{m' m} = \langle Y_{\ell m'} | R_{\ell m} \rangle - \f] - which gives the name to this function. - - Transformation from real to complex spherical harmonics is conjugate transpose: - - \f[ - Y_{\ell m}(\theta, \phi) = \sum_{m'} a^{\ell*}_{m m'}R_{\ell m'}(\theta, \phi) - \f] - - Mathematica code: - \verbatim - b[m1_, m2_] := - If[m1 == 0, 1, - If[m1 < 0 && m2 < 0, -I/Sqrt[2], - If[m1 > 0 && m2 < 0, (-1)^m1*I/Sqrt[2], - If[m1 < 0 && m2 > 0, (-1)^m2/Sqrt[2], - If[m1 > 0 && m2 > 0, 1/Sqrt[2]]]]]] - - a[m1_, m2_] := If[Abs[m1] == Abs[m2], b[m1, m2], 0] - - Rlm[l_, m_, t_, p_] := Sum[a[m1, m]*SphericalHarmonicY[l, m1, t, p], {m1, -l, l}] - \endverbatim - */ - static inline double_complex ylm_dot_rlm(int l, int m1, int m2) - { - double const isqrt2 = 1.0 / std::sqrt(2); - - assert(l >= 0 && std::abs(m1) <= l && std::abs(m2) <= l); - - if (!((m1 == m2) || (m1 == -m2))) { - return double_complex(0, 0); - } - - if (m1 == 0) { - return double_complex(1, 0); - } - - if (m1 < 0) { - if (m2 < 0) { - return -double_complex(0, isqrt2); - } else { - return std::pow(-1.0, m2) * double_complex(isqrt2, 0); - } - } else { - if (m2 < 0) { - return std::pow(-1.0, m1) * double_complex(0, isqrt2); - } else { - return double_complex(isqrt2, 0); - } - } - } - - static inline double_complex rlm_dot_ylm(int l, int m1, int m2) - { - return std::conj(ylm_dot_rlm(l, m2, m1)); - } - - /// Gaunt coefficent of three complex spherical harmonics. - /** - * \f[ - * \langle Y_{\ell_1 m_1} | Y_{\ell_2 m_2} | Y_{\ell_3 m_3} \rangle - * \f] - */ - static double gaunt_ylm(int l1, int l2, int l3, int m1, int m2, int m3) - { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - assert(m1 >= -l1 && m1 <= l1); - assert(m2 >= -l2 && m2 <= l2); - assert(m3 >= -l3 && m3 <= l3); - - return std::pow(-1.0, std::abs(m1)) * std::sqrt(double(2 * l1 + 1) * double(2 * l2 + 1) * double(2 * l3 + 1) / fourpi) * - gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, 0, 0, 0) * - gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, -2 * m1, 2 * m2, 2 * m3); - } - - /// Gaunt coefficent of three real spherical harmonics. - /** - * \f[ - * \langle R_{\ell_1 m_1} | R_{\ell_2 m_2} | R_{\ell_3 m_3} \rangle - * \f] - */ - static double gaunt_rlm(int l1, int l2, int l3, int m1, int m2, int m3) - { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - assert(m1 >= -l1 && m1 <= l1); - assert(m2 >= -l2 && m2 <= l2); - assert(m3 >= -l3 && m3 <= l3); - - double d = 0; - for (int k1 = -l1; k1 <= l1; k1++) { - for (int k2 = -l2; k2 <= l2; k2++) { - for (int k3 = -l3; k3 <= l3; k3++) { - d += std::real(std::conj(SHT::ylm_dot_rlm(l1, k1, m1)) * - SHT::ylm_dot_rlm(l2, k2, m2) * - SHT::ylm_dot_rlm(l3, k3, m3)) * SHT::gaunt_ylm(l1, l2, l3, k1, k2, k3); - } - } - } - return d; - } - - /// Gaunt coefficent of two real spherical harmonics with a complex one. - /** - * \f[ - * \langle R_{\ell_1 m_1} | Y_{\ell_2 m_2} | R_{\ell_3 m_3} \rangle - * \f] - */ - static double gaunt_rlm_ylm_rlm(int l1, int l2, int l3, int m1, int m2, int m3) - { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - assert(m1 >= -l1 && m1 <= l1); - assert(m2 >= -l2 && m2 <= l2); - assert(m3 >= -l3 && m3 <= l3); - - double d = 0; - for (int k1 = -l1; k1 <= l1; k1++) { - for (int k3 = -l3; k3 <= l3; k3++) { - d += std::real(std::conj(SHT::ylm_dot_rlm(l1, k1, m1)) * - SHT::ylm_dot_rlm(l3, k3, m3)) * SHT::gaunt_ylm(l1, l2, l3, k1, m2, k3); - } - } - return d; - } - - /// Gaunt coefficent of two complex and one real spherical harmonics. - /** - * \f[ - * \langle Y_{\ell_1 m_1} | R_{\ell_2 m_2} | Y_{\ell_3 m_3} \rangle - * \f] - */ - static double_complex gaunt_hybrid(int l1, int l2, int l3, int m1, int m2, int m3) - { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - assert(m1 >= -l1 && m1 <= l1); - assert(m2 >= -l2 && m2 <= l2); - assert(m3 >= -l3 && m3 <= l3); - - if (m2 == 0) { - return double_complex(gaunt_ylm(l1, l2, l3, m1, m2, m3), 0.0); - } else { - return (ylm_dot_rlm(l2, m2, m2) * gaunt_ylm(l1, l2, l3, m1, m2, m3) + - ylm_dot_rlm(l2, -m2, m2) * gaunt_ylm(l1, l2, l3, m1, -m2, m3)); - } - } - - void uniform_coverage() - { - tp_(0, 0) = pi; - tp_(1, 0) = 0; - - for (int k = 1; k < num_points_ - 1; k++) { - double hk = -1.0 + double(2 * k) / double(num_points_ - 1); - tp_(0, k) = std::acos(hk); - double t = tp_(1, k - 1) + 3.80925122745582 / std::sqrt(double(num_points_)) / std::sqrt(1 - hk * hk); - tp_(1, k) = std::fmod(t, twopi); - } - - tp_(0, num_points_ - 1) = 0; - tp_(1, num_points_ - 1) = 0; - } - - /// Return Clebsch-Gordan coefficient. - /** Clebsch-Gordan coefficients arise when two angular momenta are combined into a - * total angular momentum. - */ - static inline double clebsch_gordan(int l1, int l2, int l3, int m1, int m2, int m3) - { - assert(l1 >= 0); - assert(l2 >= 0); - assert(l3 >= 0); - assert(m1 >= -l1 && m1 <= l1); - assert(m2 >= -l2 && m2 <= l2); - assert(m3 >= -l3 && m3 <= l3); - - return std::pow(-1, l1 - l2 + m3) * std::sqrt(double(2 * l3 + 1)) * - gsl_sf_coupling_3j(2 * l1, 2 * l2, 2 * l3, 2 * m1, 2 * m2, -2 * m3); - } - - inline double_complex ylm_backward(int lm, int itp) const - { - return ylm_backward_(lm, itp); - } - - inline double rlm_backward(int lm, int itp) const - { - return rlm_backward_(lm, itp); - } - - inline double coord(int x, int itp) const - { - return coord_(x, itp); - } - - inline vector3d coord(int idx__) const - { - return vector3d(coord_(0, idx__), coord_(1, idx__), coord(2, idx__)); - } - - inline double theta(int idx__) const - { - return tp_(0, idx__); - } - - inline double phi(int idx__) const - { - return tp_(1, idx__); - } - - inline double weight(int idx__) const - { - return w_[idx__]; - } - - inline int num_points() const - { - return num_points_; - } - - inline int lmax() const - { - return lmax_; - } - - inline int lmmax() const - { - return lmmax_; - } - - static void wigner_d_matrix(int l, double beta, mdarray& d_mtrx__) - { - long double cos_b2 = std::cos((long double)beta / 2.0L); - long double sin_b2 = std::sin((long double)beta / 2.0L); - - for (int m1 = -l; m1 <= l; m1++) { - for (int m2 = -l; m2 <= l; m2++) { - long double d = 0; - for (int j = 0; j <= std::min(l + m1, l - m2); j++) { - if ((l - m2 - j) >= 0 && (l + m1 - j) >= 0 && (j + m2 - m1) >= 0) { - long double g = (std::sqrt(utils::factorial(l + m1)) / utils::factorial(l - m2 - j)) * - (std::sqrt(utils::factorial(l - m1)) / utils::factorial(l + m1 - j)) * - (std::sqrt(utils::factorial(l - m2)) / utils::factorial(j + m2 - m1)) * - (std::sqrt(utils::factorial(l + m2)) / utils::factorial(j)); - d += g * std::pow(-1, j) * std::pow(cos_b2, 2 * l + m1 - m2 - 2 * j) * std::pow(sin_b2, 2 * j + m2 - m1); - } - } - d_mtrx__(m1 + l, m2 + l) = (double)d; - } - } - } - - static void rotation_matrix_l(int l, vector3d euler_angles, int proper_rotation, - double_complex* rot_mtrx__, int ld) - { - mdarray rot_mtrx(rot_mtrx__, ld, 2 * l + 1); - - mdarray d_mtrx(2 * l + 1, 2 * l + 1); - wigner_d_matrix(l, euler_angles[1], d_mtrx); - - for (int m1 = -l; m1 <= l; m1++) { - for (int m2 = -l; m2 <= l; m2++) { - rot_mtrx(m1 + l, m2 + l) = std::exp(double_complex(0, -euler_angles[0] * m1 - euler_angles[2] * m2)) * - d_mtrx(m1 + l, m2 + l) * std::pow(proper_rotation, l); - } - } - } - - static void rotation_matrix_l(int l, vector3d euler_angles, int proper_rotation, - double* rot_mtrx__, int ld) - { - mdarray rot_mtrx_rlm(rot_mtrx__, ld, 2 * l + 1); - mdarray rot_mtrx_ylm(2 * l + 1, 2 * l + 1); - - mdarray d_mtrx(2 * l + 1, 2 * l + 1); - wigner_d_matrix(l, euler_angles[1], d_mtrx); - - for (int m1 = -l; m1 <= l; m1++) - { - for (int m2 = -l; m2 <= l; m2++) - { - rot_mtrx_ylm(m1 + l, m2 + l) = std::exp(double_complex(0, -euler_angles[0] * m1 - euler_angles[2] * m2)) * - d_mtrx(m1 + l, m2 + l) * std::pow(proper_rotation, l); - } - } - for (int m1 = -l; m1 <= l; m1++) - { - auto i13 = (m1 == 0) ? std::vector({0}) : std::vector({-m1, m1}); - - for (int m2 = -l; m2 <= l; m2++) - { - auto i24 = (m2 == 0) ? std::vector({0}) : std::vector({-m2, m2}); - - for (int m3: i13) - { - for (int m4: i24) - { - rot_mtrx_rlm(m1 + l, m2 + l) += std::real(rlm_dot_ylm(l, m1, m3) * - rot_mtrx_ylm(m3 + l, m4 + l) * - ylm_dot_rlm(l, m4, m2)); - } - } - } - } - } - - template - static void rotation_matrix(int lmax, - vector3d euler_angles, - int proper_rotation, - mdarray& rotm) - { - rotm.zero(); - - for (int l = 0; l <= lmax; l++) { - rotation_matrix_l(l, euler_angles, proper_rotation, &rotm(l * l, l * l), rotm.ld()); - } - } - - /// Compute derivative of real-spherical harmonic with respect to theta angle. - static void dRlm_dtheta(int lmax, double theta, double phi, mdarray& data) - { - assert(lmax <= 8); - - data[0]=0; - - if (lmax==0) return; - - auto cos_theta = SHT::cosxn(lmax, theta); - auto sin_theta = SHT::sinxn(lmax, theta); - auto cos_phi = SHT::cosxn(lmax, phi); - auto sin_phi = SHT::sinxn(lmax, phi); - - data[1]=-(std::sqrt(3/pi)*cos_theta[0]*sin_phi[0])/2.; - - data[2]=-(std::sqrt(3/pi)*sin_theta[0])/2.; - - data[3]=-(std::sqrt(3/pi)*cos_phi[0]*cos_theta[0])/2.; - - if (lmax==1) return; - - data[4]=-(std::sqrt(15/pi)*cos_phi[0]*cos_theta[0]*sin_phi[0]*sin_theta[0]); - - data[5]=-(std::sqrt(15/pi)*cos_theta[1]*sin_phi[0])/2.; - - data[6]=(-3*std::sqrt(5/pi)*cos_theta[0]*sin_theta[0])/2.; - - data[7]=-(std::sqrt(15/pi)*cos_phi[0]*cos_theta[1])/2.; - - data[8]=(std::sqrt(15/pi)*cos_phi[1]*sin_theta[1])/4.; - - if (lmax==2) return; - - data[9]=(-3*std::sqrt(35/(2.*pi))*cos_theta[0]*sin_phi[2]*std::pow(sin_theta[0],2))/4.; - - data[10]=(std::sqrt(105/pi)*sin_phi[1]*(sin_theta[0] - 3*sin_theta[2]))/16.; - - data[11]=-(std::sqrt(21/(2.*pi))*(cos_theta[0] + 15*cos_theta[2])*sin_phi[0])/16.; - - data[12]=(-3*std::sqrt(7/pi)*(sin_theta[0] + 5*sin_theta[2]))/16.; - - data[13]=-(std::sqrt(21/(2.*pi))*cos_phi[0]*(cos_theta[0] + 15*cos_theta[2]))/16.; - - data[14]=-(std::sqrt(105/pi)*cos_phi[1]*(sin_theta[0] - 3*sin_theta[2]))/16.; - - data[15]=(-3*std::sqrt(35/(2.*pi))*cos_phi[2]*cos_theta[0]*std::pow(sin_theta[0],2))/4.; - - if (lmax==3) return; - - data[16]=(-3*std::sqrt(35/pi)*cos_theta[0]*sin_phi[3]*std::pow(sin_theta[0],3))/4.; - - data[17]=(-3*std::sqrt(35/(2.*pi))*(1 + 2*cos_theta[1])*sin_phi[2]*std::pow(sin_theta[0],2))/4.; - - data[18]=(3*std::sqrt(5/pi)*sin_phi[1]*(2*sin_theta[1] - 7*sin_theta[3]))/16.; - - data[19]=(-3*std::sqrt(5/(2.*pi))*(cos_theta[1] + 7*cos_theta[3])*sin_phi[0])/8.; - - data[20]=(-15*(2*sin_theta[1] + 7*sin_theta[3]))/(32.*std::sqrt(pi)); - - data[21]=(-3*std::sqrt(5/(2.*pi))*cos_phi[0]*(cos_theta[1] + 7*cos_theta[3]))/8.; - - data[22]=(3*std::sqrt(5/pi)*cos_phi[1]*(-2*sin_theta[1] + 7*sin_theta[3]))/16.; - - data[23]=(-3*std::sqrt(35/(2.*pi))*cos_phi[2]*(1 + 2*cos_theta[1])*std::pow(sin_theta[0],2))/4.; - - data[24]=(3*std::sqrt(35/pi)*cos_phi[3]*cos_theta[0]*std::pow(sin_theta[0],3))/4.; - - if (lmax==4) return; - - data[25]=(-15*std::sqrt(77/(2.*pi))*cos_theta[0]*sin_phi[4]*std::pow(sin_theta[0],4))/16.; - - data[26]=(-3*std::sqrt(385/pi)*(3 + 5*cos_theta[1])*sin_phi[3]*std::pow(sin_theta[0],3))/32.; - - data[27]=(-3*std::sqrt(385/(2.*pi))*cos_theta[0]*(1 + 15*cos_theta[1])*sin_phi[2]*std::pow(sin_theta[0],2))/32.; - - data[28]=(std::sqrt(1155/pi)*sin_phi[1]*(2*sin_theta[0] + 3*(sin_theta[2] - 5*sin_theta[4])))/128.; - - data[29]=-(std::sqrt(165/pi)*(2*cos_theta[0] + 21*(cos_theta[2] + 5*cos_theta[4]))*sin_phi[0])/256.; - - data[30]=(-15*std::sqrt(11/pi)*(2*sin_theta[0] + 7*(sin_theta[2] + 3*sin_theta[4])))/256.; - - data[31]=-(std::sqrt(165/pi)*cos_phi[0]*(2*cos_theta[0] + 21*(cos_theta[2] + 5*cos_theta[4])))/256.; - - data[32]=(std::sqrt(1155/pi)*cos_phi[1]*(-2*sin_theta[0] - 3*sin_theta[2] + 15*sin_theta[4]))/128.; - - data[33]=(-3*std::sqrt(385/(2.*pi))*cos_phi[2]*(17*cos_theta[0] + 15*cos_theta[2])*std::pow(sin_theta[0],2))/64.; - - data[34]=(3*std::sqrt(385/pi)*cos_phi[3]*(3 + 5*cos_theta[1])*std::pow(sin_theta[0],3))/32.; - - data[35]=(-15*std::sqrt(77/(2.*pi))*cos_phi[4]*cos_theta[0]*std::pow(sin_theta[0],4))/16.; - - if (lmax==5) return; - - data[36]=(-3*std::sqrt(3003/(2.*pi))*cos_theta[0]*sin_phi[5]*std::pow(sin_theta[0],5))/16.; - - data[37]=(-3*std::sqrt(1001/(2.*pi))*(2 + 3*cos_theta[1])*sin_phi[4]*std::pow(sin_theta[0],4))/16.; - - data[38]=(-3*std::sqrt(91/pi)*cos_theta[0]*(7 + 33*cos_theta[1])*sin_phi[3]*std::pow(sin_theta[0],3))/32.; - - data[39]=(-3*std::sqrt(1365/(2.*pi))*(7 + 14*cos_theta[1] + 11*cos_theta[3])*sin_phi[2]*std::pow(sin_theta[0],2))/64.; - - data[40]=(std::sqrt(1365/(2.*pi))*sin_phi[1]*(17*sin_theta[1] + 12*sin_theta[3] - 99*sin_theta[5]))/512.; - - data[41]=-(std::sqrt(273/pi)*(5*cos_theta[1] + 24*cos_theta[3] + 99*cos_theta[5])*sin_phi[0])/256.; - - data[42]=(-21*std::sqrt(13/pi)*(5*sin_theta[1] + 12*sin_theta[3] + 33*sin_theta[5]))/512.; - - data[43]=-(std::sqrt(273/pi)*cos_phi[0]*(5*cos_theta[1] + 24*cos_theta[3] + 99*cos_theta[5]))/256.; - - data[44]=(std::sqrt(1365/(2.*pi))*cos_phi[1]*(-17*sin_theta[1] - 12*sin_theta[3] + 99*sin_theta[5]))/512.; - - data[45]=(-3*std::sqrt(1365/(2.*pi))*cos_phi[2]*(7 + 14*cos_theta[1] + 11*cos_theta[3])*std::pow(sin_theta[0],2))/64.; - - data[46]=(3*std::sqrt(91/pi)*cos_phi[3]*(47*cos_theta[0] + 33*cos_theta[2])*std::pow(sin_theta[0],3))/64.; - - data[47]=(-3*std::sqrt(1001/(2.*pi))*cos_phi[4]*(2 + 3*cos_theta[1])*std::pow(sin_theta[0],4))/16.; - - data[48]=(3*std::sqrt(3003/(2.*pi))*cos_phi[5]*cos_theta[0]*std::pow(sin_theta[0],5))/16.; - - if (lmax==6) return; - - data[49]=(-21*std::sqrt(715/pi)*cos_theta[0]*sin_phi[6]*std::pow(sin_theta[0],6))/64.; - - data[50]=(-3*std::sqrt(5005/(2.*pi))*(5 + 7*cos_theta[1])*sin_phi[5]*std::pow(sin_theta[0],5))/64.; - - data[51]=(-3*std::sqrt(385/pi)*cos_theta[0]*(29 + 91*cos_theta[1])*sin_phi[4]*std::pow(sin_theta[0],4))/128.; - - data[52]=(-3*std::sqrt(385/pi)*(81 + 148*cos_theta[1] + 91*cos_theta[3])*sin_phi[3]*std::pow(sin_theta[0],3))/256.; - - data[53]=(-3*std::sqrt(35/pi)*cos_theta[0]*(523 + 396*cos_theta[1] + 1001*cos_theta[3])*sin_phi[2]*std::pow(sin_theta[0],2))/512.; - - data[54]=(3*std::sqrt(35/(2.*pi))*sin_phi[1]*(75*sin_theta[0] + 171*sin_theta[2] + 55*sin_theta[4] - 1001*sin_theta[6]))/2048.; - - data[55]=-(std::sqrt(105/pi)*(25*cos_theta[0] + 243*cos_theta[2] + 825*cos_theta[4] + 3003*cos_theta[6])*sin_phi[0])/4096.; - - data[56]=(-7*std::sqrt(15/pi)*(25*sin_theta[0] + 81*sin_theta[2] + 165*sin_theta[4] + 429*sin_theta[6]))/2048.; - - data[57]=-(std::sqrt(105/pi)*cos_phi[0]*(25*cos_theta[0] + 243*cos_theta[2] + 825*cos_theta[4] + 3003*cos_theta[6]))/4096.; - - data[58]=(-3*std::sqrt(35/(2.*pi))*cos_phi[1]*(75*sin_theta[0] + 171*sin_theta[2] + 55*sin_theta[4] - 1001*sin_theta[6]))/2048.; - - data[59]=(-3*std::sqrt(35/pi)*cos_phi[2]*(1442*cos_theta[0] + 1397*cos_theta[2] + 1001*cos_theta[4])*std::pow(sin_theta[0],2))/1024.; - - data[60]=(3*std::sqrt(385/pi)*cos_phi[3]*(81 + 148*cos_theta[1] + 91*cos_theta[3])*std::pow(sin_theta[0],3))/256.; - - data[61]=(-3*std::sqrt(385/pi)*cos_phi[4]*(149*cos_theta[0] + 91*cos_theta[2])*std::pow(sin_theta[0],4))/256.; - - data[62]=(3*std::sqrt(5005/(2.*pi))*cos_phi[5]*(5 + 7*cos_theta[1])*std::pow(sin_theta[0],5))/64.; - - data[63]=(-21*std::sqrt(715/pi)*cos_phi[6]*cos_theta[0]*std::pow(sin_theta[0],6))/64.; - - if (lmax==7) return; - - data[64]=(-3*std::sqrt(12155/pi)*cos_theta[0]*sin_phi[7]*std::pow(sin_theta[0],7))/32.; - - data[65]=(-3*std::sqrt(12155/pi)*(3 + 4*cos_theta[1])*sin_phi[6]*std::pow(sin_theta[0],6))/64.; - - data[66]=(-3*std::sqrt(7293/(2.*pi))*cos_theta[0]*(2 + 5*cos_theta[1])*sin_phi[5]*std::pow(sin_theta[0],5))/16.; - - data[67]=(-3*std::sqrt(17017/pi)*(11 + 19*cos_theta[1] + 10*cos_theta[3])*sin_phi[4]*std::pow(sin_theta[0],4))/128.; - - data[68]=(-3*std::sqrt(1309/pi)*cos_theta[0]*(43 + 52*cos_theta[1] + 65*cos_theta[3])*sin_phi[3]*std::pow(sin_theta[0],3))/128.; - - data[69]=(-3*std::sqrt(19635/pi)*(21 + 42*cos_theta[1] + 39*cos_theta[3] + 26*cos_theta[5])*sin_phi[2]*std::pow(sin_theta[0],2))/512.; - - data[70]=(-3*std::sqrt(595/(2.*pi))*(-8 + 121*cos_theta[1] + 143*cos_theta[5])*sin_phi[1]*sin_theta[1])/512.; - - data[71]=(-3*std::sqrt(17/pi)*(35*cos_theta[1] + 154*cos_theta[3] + 429*cos_theta[5] + 1430*cos_theta[7])*sin_phi[0])/2048.; - - data[72]=(-9*std::sqrt(17/pi)*(70*sin_theta[1] + 154*sin_theta[3] + 286*sin_theta[5] + 715*sin_theta[7]))/4096.; - - data[73]=(-3*std::sqrt(17/pi)*cos_phi[0]*(35*cos_theta[1] + 154*cos_theta[3] + 429*cos_theta[5] + 1430*cos_theta[7]))/2048.; - - data[74]=(3*std::sqrt(595/(2.*pi))*cos_phi[1]*(-16*sin_theta[1] - 22*sin_theta[3] + 143*sin_theta[7]))/1024.; - - data[75]=(-3*std::sqrt(19635/pi)*cos_phi[2]*(21 + 42*cos_theta[1] + 39*cos_theta[3] + 26*cos_theta[5])*std::pow(sin_theta[0],2))/512.; - - data[76]=(3*std::sqrt(1309/pi)*cos_phi[3]*(138*cos_theta[0] + 117*cos_theta[2] + 65*cos_theta[4])*std::pow(sin_theta[0],3))/256.; - - data[77]=(-3*std::sqrt(17017/pi)*cos_phi[4]*(11 + 19*cos_theta[1] + 10*cos_theta[3])*std::pow(sin_theta[0],4))/128.; - - data[78]=(3*std::sqrt(7293/(2.*pi))*cos_phi[5]*(9*cos_theta[0] + 5*cos_theta[2])*std::pow(sin_theta[0],5))/32.; - - data[79]=(-3*std::sqrt(12155/pi)*cos_phi[6]*(3 + 4*cos_theta[1])*std::pow(sin_theta[0],6))/64.; - - data[80]=(3*std::sqrt(12155/pi)*cos_phi[7]*cos_theta[0]*std::pow(sin_theta[0],7))/32.; - } - - /// Compute derivative of real-spherical harmonic with respect to phi angle and divide by sin(theta). - static void dRlm_dphi_sin_theta(int lmax, double theta, double phi, mdarray& data) - { - assert(lmax <= 8); - - data[0]=0; - - if (lmax==0) return; - - auto cos_theta = SHT::cosxn(lmax, theta); - auto sin_theta = SHT::sinxn(lmax, theta); - auto cos_phi = SHT::cosxn(lmax, phi); - auto sin_phi = SHT::sinxn(lmax, phi); - - data[1]=-(std::sqrt(3/pi)*cos_phi[0])/2.; - - data[2]=0; - - data[3]=(std::sqrt(3/pi)*sin_phi[0])/2.; - - if (lmax==1) return; - - data[4]=-(std::sqrt(15/pi)*cos_phi[1]*sin_theta[0])/2.; - - data[5]=-(std::sqrt(15/pi)*cos_phi[0]*cos_theta[0])/2.; - - data[6]=0; - - data[7]=(std::sqrt(15/pi)*cos_theta[0]*sin_phi[0])/2.; - - data[8]=-(std::sqrt(15/pi)*cos_phi[0]*sin_phi[0]*sin_theta[0]); - - if (lmax==2) return; - - data[9]=(-3*std::sqrt(35/(2.*pi))*cos_phi[2]*std::pow(sin_theta[0],2))/4.; - - data[10]=-(std::sqrt(105/pi)*cos_phi[1]*sin_theta[1])/4.; - - data[11]=-(std::sqrt(21/(2.*pi))*cos_phi[0]*(3 + 5*cos_theta[1]))/8.; - - data[12]=0; - - data[13]=(std::sqrt(21/(2.*pi))*(3 + 5*cos_theta[1])*sin_phi[0])/8.; - - data[14]=-(std::sqrt(105/pi)*cos_phi[0]*cos_theta[0]*sin_phi[0]*sin_theta[0]); - - data[15]=(3*std::sqrt(35/(2.*pi))*sin_phi[2]*std::pow(sin_theta[0],2))/4.; - - if (lmax==3) return; - - data[16]=(-3*std::sqrt(35/pi)*cos_phi[3]*std::pow(sin_theta[0],3))/4.; - - data[17]=(-9*std::sqrt(35/(2.*pi))*cos_phi[2]*cos_theta[0]*std::pow(sin_theta[0],2))/4.; - - data[18]=(-3*std::sqrt(5/pi)*cos_phi[1]*(3*sin_theta[0] + 7*sin_theta[2]))/16.; - - data[19]=(-3*std::sqrt(5/(2.*pi))*cos_phi[0]*(9*cos_theta[0] + 7*cos_theta[2]))/16.; - - data[20]=0; - - data[21]=(3*std::sqrt(5/(2.*pi))*cos_theta[0]*(1 + 7*cos_theta[1])*sin_phi[0])/8.; - - data[22]=(-3*std::sqrt(5/pi)*sin_phi[1]*(3*sin_theta[0] + 7*sin_theta[2]))/16.; - - data[23]=(9*std::sqrt(35/(2.*pi))*cos_theta[0]*sin_phi[2]*std::pow(sin_theta[0],2))/4.; - - data[24]=(-3*std::sqrt(35/pi)*sin_phi[3]*std::pow(sin_theta[0],3))/4.; - - if (lmax==4) return; - - data[25]=(-15*std::sqrt(77/(2.*pi))*cos_phi[4]*std::pow(sin_theta[0],4))/16.; - - data[26]=(-3*std::sqrt(385/pi)*cos_phi[3]*cos_theta[0]*std::pow(sin_theta[0],3))/4.; - - data[27]=(-3*std::sqrt(385/(2.*pi))*cos_phi[2]*(7 + 9*cos_theta[1])*std::pow(sin_theta[0],2))/32.; - - data[28]=-(std::sqrt(1155/pi)*cos_phi[1]*(2*sin_theta[1] + 3*sin_theta[3]))/32.; - - data[29]=-(std::sqrt(165/pi)*cos_phi[0]*(15 + 28*cos_theta[1] + 21*cos_theta[3]))/128.; - - data[30]=0; - - data[31]=(std::sqrt(165/pi)*(15 + 28*cos_theta[1] + 21*cos_theta[3])*sin_phi[0])/128.; - - data[32]=-(std::sqrt(1155/pi)*sin_phi[1]*(2*sin_theta[1] + 3*sin_theta[3]))/32.; - - data[33]=(3*std::sqrt(385/(2.*pi))*(7 + 9*cos_theta[1])*sin_phi[2]*std::pow(sin_theta[0],2))/32.; - - data[34]=(-3*std::sqrt(385/pi)*cos_theta[0]*sin_phi[3]*std::pow(sin_theta[0],3))/4.; - - data[35]=(15*std::sqrt(77/(2.*pi))*sin_phi[4]*std::pow(sin_theta[0],4))/16.; - - if (lmax==5) return; - - data[36]=(-3*std::sqrt(3003/(2.*pi))*cos_phi[5]*std::pow(sin_theta[0],5))/16.; - - data[37]=(-15*std::sqrt(1001/(2.*pi))*cos_phi[4]*cos_theta[0]*std::pow(sin_theta[0],4))/16.; - - data[38]=(-3*std::sqrt(91/pi)*cos_phi[3]*(9 + 11*cos_theta[1])*std::pow(sin_theta[0],3))/16.; - - data[39]=(-3*std::sqrt(1365/(2.*pi))*cos_phi[2]*(21*cos_theta[0] + 11*cos_theta[2])*std::pow(sin_theta[0],2))/64.; - - data[40]=-(std::sqrt(1365/(2.*pi))*cos_phi[1]*(10*sin_theta[0] + 27*sin_theta[2] + 33*sin_theta[4]))/256.; - - data[41]=-(std::sqrt(273/pi)*cos_phi[0]*(50*cos_theta[0] + 45*cos_theta[2] + 33*cos_theta[4]))/256.; - - data[42]=0; - - data[43]=(std::sqrt(273/pi)*cos_theta[0]*(19 + 12*cos_theta[1] + 33*cos_theta[3])*sin_phi[0])/128.; - - data[44]=-(std::sqrt(1365/(2.*pi))*sin_phi[1]*(10*sin_theta[0] + 27*sin_theta[2] + 33*sin_theta[4]))/256.; - - data[45]=(3*std::sqrt(1365/(2.*pi))*cos_theta[0]*(5 + 11*cos_theta[1])*sin_phi[2]*std::pow(sin_theta[0],2))/32.; - - data[46]=(-3*std::sqrt(91/pi)*(9 + 11*cos_theta[1])*sin_phi[3]*std::pow(sin_theta[0],3))/16.; - - data[47]=(15*std::sqrt(1001/(2.*pi))*cos_theta[0]*sin_phi[4]*std::pow(sin_theta[0],4))/16.; - - data[48]=(-3*std::sqrt(3003/(2.*pi))*sin_phi[5]*std::pow(sin_theta[0],5))/16.; - - if (lmax==6) return; - - data[49]=(-21*std::sqrt(715/pi)*cos_phi[6]*std::pow(sin_theta[0],6))/64.; - - data[50]=(-9*std::sqrt(5005/(2.*pi))*cos_phi[5]*cos_theta[0]*std::pow(sin_theta[0],5))/16.; - - data[51]=(-15*std::sqrt(385/pi)*cos_phi[4]*(11 + 13*cos_theta[1])*std::pow(sin_theta[0],4))/128.; - - data[52]=(-3*std::sqrt(385/pi)*cos_phi[3]*(27*cos_theta[0] + 13*cos_theta[2])*std::pow(sin_theta[0],3))/32.; - - data[53]=(-9*std::sqrt(35/pi)*cos_phi[2]*(189 + 308*cos_theta[1] + 143*cos_theta[3])*std::pow(sin_theta[0],2))/512.; - - data[54]=(-3*std::sqrt(35/(2.*pi))*cos_phi[1]*(75*sin_theta[1] + 132*sin_theta[3] + 143*sin_theta[5]))/512.; - - data[55]=-(std::sqrt(105/pi)*cos_phi[0]*(350 + 675*cos_theta[1] + 594*cos_theta[3] + 429*cos_theta[5]))/2048.; - - data[56]=0; - - data[57]=(std::sqrt(105/pi)*(350 + 675*cos_theta[1] + 594*cos_theta[3] + 429*cos_theta[5])*sin_phi[0])/2048.; - - data[58]=(-3*std::sqrt(35/(2.*pi))*sin_phi[1]*(75*sin_theta[1] + 132*sin_theta[3] + 143*sin_theta[5]))/512.; - - data[59]=(9*std::sqrt(35/pi)*(189 + 308*cos_theta[1] + 143*cos_theta[3])*sin_phi[2]*std::pow(sin_theta[0],2))/512.; - - data[60]=(-3*std::sqrt(385/pi)*cos_theta[0]*(7 + 13*cos_theta[1])*sin_phi[3]*std::pow(sin_theta[0],3))/16.; - - data[61]=(15*std::sqrt(385/pi)*(11 + 13*cos_theta[1])*sin_phi[4]*std::pow(sin_theta[0],4))/128.; - - data[62]=(-9*std::sqrt(5005/(2.*pi))*cos_theta[0]*sin_phi[5]*std::pow(sin_theta[0],5))/16.; - - data[63]=(21*std::sqrt(715/pi)*sin_phi[6]*std::pow(sin_theta[0],6))/64.; - - if (lmax==7) return; - - data[64]=(-3*std::sqrt(12155/pi)*cos_phi[7]*std::pow(sin_theta[0],7))/32.; - - data[65]=(-21*std::sqrt(12155/pi)*cos_phi[6]*cos_theta[0]*std::pow(sin_theta[0],6))/64.; - - data[66]=(-3*std::sqrt(7293/(2.*pi))*cos_phi[5]*(13 + 15*cos_theta[1])*std::pow(sin_theta[0],5))/64.; - - data[67]=(-15*std::sqrt(17017/pi)*cos_phi[4]*(11*cos_theta[0] + 5*cos_theta[2])*std::pow(sin_theta[0],4))/256.; - - data[68]=(-3*std::sqrt(1309/pi)*cos_phi[3]*(99 + 156*cos_theta[1] + 65*cos_theta[3])*std::pow(sin_theta[0],3))/256.; - - data[69]=(-3*std::sqrt(19635/pi)*cos_phi[2]*(126*cos_theta[0] + 91*cos_theta[2] + 39*cos_theta[4])*std::pow(sin_theta[0],2))/1024.; - - data[70]=(-3*std::sqrt(595/(2.*pi))*cos_phi[1]*(35*sin_theta[0] + 11*(9*sin_theta[2] + 13*(sin_theta[4] + sin_theta[6]))))/2048.; - - data[71]=(-3*std::sqrt(17/pi)*cos_phi[0]*(1225*cos_theta[0] + 11*(105*cos_theta[2] + 91*cos_theta[4] + 65*cos_theta[6])))/4096.; - - data[72]=0; - - data[73]=(3*std::sqrt(17/pi)*cos_theta[0]*(178 + 869*cos_theta[1] + 286*cos_theta[3] + 715*cos_theta[5])*sin_phi[0])/2048.; - - data[74]=(-3*std::sqrt(595/(2.*pi))*sin_phi[1]*(35*sin_theta[0] + 11*(9*sin_theta[2] + 13*(sin_theta[4] + sin_theta[6]))))/2048.; - - data[75]=(3*std::sqrt(19635/pi)*cos_theta[0]*(37 + 52*cos_theta[1] + 39*cos_theta[3])*sin_phi[2]*std::pow(sin_theta[0],2))/512.; - - data[76]=(-3*std::sqrt(1309/pi)*(99 + 156*cos_theta[1] + 65*cos_theta[3])*sin_phi[3]*std::pow(sin_theta[0],3))/256.; - - data[77]=(15*std::sqrt(17017/pi)*(11*cos_theta[0] + 5*cos_theta[2])*sin_phi[4]*std::pow(sin_theta[0],4))/256.; - - data[78]=(-3*std::sqrt(7293/(2.*pi))*(13 + 15*cos_theta[1])*sin_phi[5]*std::pow(sin_theta[0],5))/64.; - - data[79]=(21*std::sqrt(12155/pi)*cos_theta[0]*sin_phi[6]*std::pow(sin_theta[0],6))/64.; - - data[80]=(-3*std::sqrt(12155/pi)*sin_phi[7]*std::pow(sin_theta[0],7))/32.; - } - - /// convert 3x3 transformation matrix to SU2 2x2 matrix - /// Create quaternion components from the 3x3 matrix. The components are just a w = Cos(\Omega/2) - /// and {x,y,z} = unit rotation vector multiplied by Sin[\Omega/2] - /// see https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation - /// and https://en.wikipedia.org/wiki/Rotation_group_SO(3)#Quaternions_of_unit_norm - static mdarray rotation_matrix_su2(const matrix3d& m) - { - double det = m.det() > 0 ? 1.0 : -1.0; - - matrix3d mat = m * det; - mdarray su2mat(2, 2); - - su2mat.zero(); - - /* make quaternion components*/ - double w = sqrt( std::max( 0., 1. + mat(0,0) + mat(1,1) + mat(2,2) ) ) / 2.; - double x = sqrt( std::max( 0., 1. + mat(0,0) - mat(1,1) - mat(2,2) ) ) / 2.; - double y = sqrt( std::max( 0., 1. - mat(0,0) + mat(1,1) - mat(2,2) ) ) / 2.; - double z = sqrt( std::max( 0., 1. - mat(0,0) - mat(1,1) + mat(2,2) ) ) / 2.; - x = std::copysign( x, mat(2,1) - mat(1,2) ); - y = std::copysign( y, mat(0,2) - mat(2,0) ); - z = std::copysign( z, mat(1,0) - mat(0,1) ); - - su2mat(0, 0) = double_complex( w, -z); - su2mat(1, 1) = double_complex( w, z); - su2mat(0, 1) = double_complex(-y, -x); - su2mat(1, 0) = double_complex( y, -x); - - return std::move(su2mat); - } - - /// Compute the derivatives of real spherical harmonics over the components of cartesian vector. - /** The following derivative is computed: - * \f[ - * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial r_{\mu}} = - * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial \theta_r} \frac{\partial \theta_r}{\partial r_{\mu}} + - * \frac{\partial R_{\ell m}(\theta_r, \phi_r)}{\partial \phi_r} \frac{\partial \phi_r}{\partial r_{\mu}} - * \f] - * The derivatives of angles are: - * \f[ - * \frac{\partial \theta_r}{\partial r_{x}} = \frac{\cos(\phi_r) \cos(\theta_r)}{r} \\ - * \frac{\partial \theta_r}{\partial r_{y}} = \frac{\cos(\theta_r) \sin(\phi_r)}{r} \\ - * \frac{\partial \theta_r}{\partial r_{z}} = -\frac{\sin(\theta_r)}{r} - * \f] - * and - * \f[ - * \frac{\partial \phi_r}{\partial r_{x}} = -\frac{\sin(\phi_r)}{\sin(\theta_r) r} \\ - * \frac{\partial \phi_r}{\partial r_{y}} = \frac{\cos(\phi_r)}{\sin(\theta_r) r} \\ - * \frac{\partial \phi_r}{\partial r_{z}} = 0 - * \f] - * The derivative of \f$ \phi \f$ has discontinuities at \f$ \theta = 0, \theta=\pi \f$. This, however, is not a problem, because - * multiplication by the the derivative of \f$ R_{\ell m} \f$ removes it. The following functions have to be hardcoded: - * \f[ - * \frac{\partial R_{\ell m}(\theta, \phi)}{\partial \theta} \\ - * \frac{\partial R_{\ell m}(\theta, \phi)}{\partial \phi} \frac{1}{\sin(\theta)} - * \f] - * - * Mathematica script for spherical harmonic derivatives: - \verbatim - Rlm[l_, m_, th_, ph_] := - If[m > 0, Sqrt[2]*ComplexExpand[Re[SphericalHarmonicY[l, m, th, ph]]], - If[m < 0, Sqrt[2]*ComplexExpand[Im[SphericalHarmonicY[l, m, th, ph]]], - If[m == 0, ComplexExpand[Re[SphericalHarmonicY[l, 0, th, ph]]]] - ] - ] - Do[Print[FullSimplify[D[Rlm[l, m, theta, phi], theta]]], {l, 0, 4}, {m, -l, l}] - Do[Print[FullSimplify[TrigExpand[D[Rlm[l, m, theta, phi], phi]/Sin[theta]]]], {l, 0, 4}, {m, -l, l}] - \endverbatim - */ - static void dRlm_dr(int lmax__, vector3d& r__, mdarray& data__) - { - /* get spherical coordinates of the Cartesian vector */ - auto vrs = spherical_coordinates(r__); - - if (vrs[0] < 1e-12) { - data__.zero(); - return; - } - - int lmmax = (lmax__ + 1) * (lmax__ + 1); - - double theta = vrs[1]; - double phi = vrs[2]; - - vector3d dtheta_dr({std::cos(phi) * std::cos(theta), std::cos(theta) * std::sin(phi), -std::sin(theta)}); - vector3d dphi_dr({-std::sin(phi), std::cos(phi), 0.0}); - - mdarray dRlm_dt(lmmax); - mdarray dRlm_dp_sin_t(lmmax); - - dRlm_dtheta(lmax__, theta, phi, dRlm_dt); - dRlm_dphi_sin_theta(lmax__, theta, phi, dRlm_dp_sin_t); - - for (int mu = 0; mu < 3; mu++) { - for (int lm = 0; lm < lmmax; lm++) { - data__(lm, mu) = (dRlm_dt[lm] * dtheta_dr[mu] + dRlm_dp_sin_t[lm] * dphi_dr[mu]) / vrs[0]; - } - } - } - - /// Generate \f$ \cos(m x) \f$ for m in [1, n] using recursion. - static mdarray cosxn(int n__, double x__) - { - assert(n__ > 0); - mdarray data(n__); - data[0] = std::cos(x__); - if (n__ > 1) { - data[1] = std::cos(2 * x__); - for (int i = 2; i < n__; i++) { - data[i] = 2 * data[0] * data[i - 1] - data[i - 2]; - } - } - return std::move(data); - } - - /// Generate \f$ \sin(m x) \f$ for m in [1, n] using recursion. - static mdarray sinxn(int n__, double x__) - { - assert(n__ > 0); - mdarray data(n__); - auto cosx = std::cos(x__); - data[0] = std::sin(x__); - if (n__ > 1) { - data[1] = std::sin(2 * x__); - for (int i = 2; i < n__; i++) { - data[i] = 2 * cosx * data[i - 1] - data[i - 2]; - } - } - return std::move(data); - } -}; - -template <> -inline void SHT::backward_transform(int ld, double const* flm, int nr, int lmmax, double* ftp) -{ - assert(lmmax <= lmmax_); - assert(ld >= lmmax); - linalg::gemm(1, 0, num_points_, nr, lmmax, &rlm_backward_(0, 0), lmmax_, flm, ld, ftp, num_points_); -} - -template <> -inline void SHT::backward_transform(int ld, double_complex const* flm, int nr, int lmmax, double_complex* ftp) -{ - assert(lmmax <= lmmax_); - assert(ld >= lmmax); - linalg::gemm(1, 0, num_points_, nr, lmmax, &ylm_backward_(0, 0), lmmax_, flm, ld, ftp, num_points_); -} - -template <> -inline void SHT::forward_transform(double const* ftp, int nr, int lmmax, int ld, double* flm) -{ - assert(lmmax <= lmmax_); - assert(ld >= lmmax); - linalg::gemm(1, 0, lmmax, nr, num_points_, &rlm_forward_(0, 0), num_points_, ftp, num_points_, flm, ld); -} - -template <> -inline void SHT::forward_transform(double_complex const* ftp, int nr, int lmmax, int ld, double_complex* flm) -{ - assert(lmmax <= lmmax_); - assert(ld >= lmmax); - linalg::gemm(1, 0, lmmax, nr, num_points_, &ylm_forward_(0, 0), num_points_, ftp, num_points_, flm, ld); -} - -} - -#endif // __SHT_H__ diff --git a/src/simulation_context.h b/src/simulation_context.h index 2a774ea9c..6a865ac2b 100644 --- a/src/simulation_context.h +++ b/src/simulation_context.h @@ -33,7 +33,7 @@ #include "radial_integrals.h" #include "utils/utils.hpp" #include "memory_pool.hpp" -#include "augmentation_operator.h" +#include "Density/augmentation_operator.hpp" #ifdef __GPU #include "SDDK/GPU/cuda.hpp" diff --git a/src/sirius.h b/src/sirius.h index 47325716b..b0650236e 100644 --- a/src/sirius.h +++ b/src/sirius.h @@ -38,11 +38,11 @@ using json = nlohmann::json; #include "radial_grid.h" #include "spline.h" #include "radial_solver.h" -#include "sht.h" -#include "gaunt.h" +#include "SHT/sht.hpp" +#include "SHT/gaunt.hpp" #include "sddk.hpp" #include "hdf5_tree.hpp" -#include "xc_functional.h" +#include "Potential/xc_functional.hpp" #include "descriptors.h" #include "mixer.h" #include "Unit_cell/free_atom.hpp" diff --git a/src/spheric_function.h b/src/spheric_function.h index c58467b83..065cf935c 100644 --- a/src/spheric_function.h +++ b/src/spheric_function.h @@ -30,7 +30,7 @@ #include #include "radial_grid.h" #include "spline.h" -#include "sht.h" +#include "SHT/sht.hpp" namespace sirius { @@ -42,7 +42,7 @@ class Spheric_function: public mdarray /// Radial grid. Radial_grid const* radial_grid_{nullptr}; - + int angular_domain_size_; Spheric_function(Spheric_function const& src__) = delete;