From 67b90563f35be14c7239e7cac12528064a86f187 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 2 Aug 2017 16:05:36 +0200 Subject: [PATCH] WIP: very inefficient implementation of two-flavour tmclover together with some performance improvements to single-flavour tmclover through the usage of the CloverBlock term in AChiMinusBDPsi. The two-flavour tmclover operator does not yet work correctly for non-zero mass-splittings. It seems that the twisted mass part works correctly. --- codegen/dslash_common.cc | 56 +++--- codegen/jinja/tm_clov_dslash_decl.h.j2 | 2 +- codegen/jinja/tm_clov_dslash_tbc_spec.h.j2 | 2 +- include/qphix/CMakeLists.txt | 2 +- include/qphix/ndtm_reuse_operator.h | 2 - include/qphix/ndtm_reuse_operator_clover.h | 117 ++++++----- include/qphix/tm_clov_dslash_body.h | 84 ++++---- include/qphix/tm_clov_dslash_def.h | 213 ++++++++------------- include/qphix/twisted_mass_clover.h | 16 +- 9 files changed, 215 insertions(+), 279 deletions(-) diff --git a/codegen/dslash_common.cc b/codegen/dslash_common.cc index b0c7c35b..177100ae 100644 --- a/codegen/dslash_common.cc +++ b/codegen/dslash_common.cc @@ -1589,21 +1589,26 @@ void achiResult(InstVector &ivector, } } - // Apply clover term, and store result in out spinor. - // This is only on the AChi - bDPsi op (achimbdpsi = true) - // This is only in body kernel (face = false) - if (twisted_mass == TwistedMassVariant::none) { - clover_term(ivector, chi_spinor, false); - } else if (twisted_mass == TwistedMassVariant::degenerate) { - full_clover_term(ivector, chi_spinor, false); - } else if (twisted_mass == TwistedMassVariant::non_degenerate) { - // TODO Here something new for the ND case has to be - // implemented. - // Currently this is just copied from the degenerate case. - full_clover_term(ivector, chi_spinor, false); - } else { - unsupported_twisted_mass_variant(); - } + clover_term(ivector, chi_spinor, false); + + // BaKo WIP: for AChi - bDPsi, we always use CloverBlock, the twisted mass + // (or epsilon) is added using linear algebra in the EO operator + // + //// Apply clover term, and store result in out spinor. + //// This is only on the AChi - bDPsi op (achimbdpsi = true) + //// This is only in body kernel (face = false) + //if (twisted_mass == TwistedMassVariant::none) { + // clover_term(ivector, chi_spinor, false); + //} else if (twisted_mass == TwistedMassVariant::degenerate) { + // full_clover_term(ivector, chi_spinor, false); + //} else if (twisted_mass == TwistedMassVariant::non_degenerate) { + // // TODO Here something new for the ND case has to be + // // implemented. + // // Currently this is just copied from the degenerate case. + // full_clover_term(ivector, chi_spinor, false); + //} else { + // unsupported_twisted_mass_variant(); + //} } else { // NO CLOVER if (twisted_mass == TwistedMassVariant::none) { for (int col = 0; col < 3; col++) { @@ -1641,9 +1646,8 @@ void achiResult(InstVector &ivector, // for pure twisted mass and stores result in out_spinor twisted_term(ivector, isPlus); } else if (twisted_mass == TwistedMassVariant::non_degenerate) { - // TODO Here something new for the ND case has to be - // implemented. - // Currently this is just copied from the degenerate case. + // the full ND twisted mass term with mass splitting is implemented + // as a linalg functor in QPhiX, this is basically never used // Loads spinor elements from chiBase, multiplies with A // for pure twisted mass and stores result in out_spinor @@ -1853,18 +1857,10 @@ void dslash_achimbdpsi_body(InstVector &ivector, } if (clover) { - if (twisted_mass == TwistedMassVariant::none) { - declare_clover(ivector); - } else if (twisted_mass == TwistedMassVariant::degenerate) { - declare_full_clover(ivector); - } else if (twisted_mass == TwistedMassVariant::non_degenerate) { - // TODO Here something new for the ND case has to be - // implemented. - // Currently this is just copied from the degenerate case. - declare_full_clover(ivector); - } else { - unsupported_twisted_mass_variant(); - } + // whether we are using twisted mass or not, the clover term in AChiMinusBDPsi + // is always of the Wilson form. The twisted mass is added via the preconditioning + // mass below. + declare_clover(ivector); } // Fill result with a*chi diff --git a/codegen/jinja/tm_clov_dslash_decl.h.j2 b/codegen/jinja/tm_clov_dslash_decl.h.j2 index a7faf743..e3174157 100644 --- a/codegen/jinja/tm_clov_dslash_decl.h.j2 +++ b/codegen/jinja/tm_clov_dslash_decl.h.j2 @@ -62,7 +62,7 @@ void tm_clov_dslash_achimbdpsi_{{ isign }}_vec( typename Types::FourSpinorBlock *oBase, const typename Types::SU3MatrixBlock *gBase, - const typename Types::FullCloverBlock + const typename Types::CloverBlock *clBase, const int xbOffs[veclen], const int xfOffs[veclen], diff --git a/codegen/jinja/tm_clov_dslash_tbc_spec.h.j2 b/codegen/jinja/tm_clov_dslash_tbc_spec.h.j2 index 3cc93aa0..1c0c0550 100644 --- a/codegen/jinja/tm_clov_dslash_tbc_spec.h.j2 +++ b/codegen/jinja/tm_clov_dslash_tbc_spec.h.j2 @@ -42,7 +42,7 @@ void tm_clov_dslash_achimbdpsi_{{ isign }}_vec::FourSpinorBlock *chiBase, Types::FourSpinorBlock *oBase, const Types::SU3MatrixBlock *gBase, - const Types::FullCloverBlock *clBase, + const Types::CloverBlock *clBase, const int xbOffs[VEC], const int xfOffs[VEC], const int ybOffs[VEC], diff --git a/include/qphix/CMakeLists.txt b/include/qphix/CMakeLists.txt index b184d257..8c4b79d2 100644 --- a/include/qphix/CMakeLists.txt +++ b/include/qphix/CMakeLists.txt @@ -42,7 +42,7 @@ if( ${twisted_mass} ) endif() if( ${tm_clover} ) - list(APPEND header_files twisted_mass_clover.h tm_clov_dslash_def.h tm_clov_dslash_body.h tm_clov_dslash_face.h) + list(APPEND header_files ndtm_reuse_operator_clover.h twisted_mass_clover.h tm_clov_dslash_def.h tm_clov_dslash_body.h tm_clov_dslash_face.h) endif() list(APPEND header_files ${PROJECT_BINARY_DIR}/include/qphix/qphix_config_internal.h) diff --git a/include/qphix/ndtm_reuse_operator.h b/include/qphix/ndtm_reuse_operator.h index bba63bdd..8eb5c9c6 100644 --- a/include/qphix/ndtm_reuse_operator.h +++ b/include/qphix/ndtm_reuse_operator.h @@ -14,8 +14,6 @@ class EvenOddNDTMWilsonReuseOperator : public TwoFlavEvenOddLinearOperator { public: - typedef - typename Geometry::FullCloverBlock FullCloverBlock; typedef typename Geometry::FourSpinorBlock FourSpinorBlock; typedef diff --git a/include/qphix/ndtm_reuse_operator_clover.h b/include/qphix/ndtm_reuse_operator_clover.h index e0c1d2f9..c2a8f47f 100644 --- a/include/qphix/ndtm_reuse_operator_clover.h +++ b/include/qphix/ndtm_reuse_operator_clover.h @@ -2,6 +2,7 @@ #include "qphix/linearOp.h" #include "qphix/tm_clov_dslash_def.h" +#include "qphix/clover_dslash_def.h" #include @@ -26,30 +27,32 @@ class EvenOddNDTMCloverReuseOperator public: typedef typename Geometry::FullCloverBlock FullCloverBlock; + typedef typename Geometry::CloverBlock + CloverBlock; typedef typename Geometry::FourSpinorBlock FourSpinorBlock; typedef typename Geometry::SU3MatrixBlock SU3MatrixBlock; /** - \param[in] u Gauge fields, one for each checkerboard index. - - \param[in] clov Two flavor parts of the odd-odd term. The first index is - the flavor index, the second index is for the hermitian conjugation, just - like \p clov of \ref dslashAChiMinusBDPsi or \p invclov of \ref - two_flav_dslash. - - \param[in] invclov The four blocks of \f$ (A^{-1})_{ff'} \f$. The first - (slowest) index iterates through the flavor index \f$ f \f$, it coincides - with the resulting spinor flavor index. The second index is \f$ f' \f$ - which coincides with the input spinor flavor indices. The last (fastest) - index is the same as for the \ref dslash function: It is the hermitian - conjugate of the inverse clover block. + \param[in] u_ Gauge fields, one for each checkerboard index. + + \param[in] clov_ Single Wilson-clover style clover term which will be applied + in two_flav_AChiMinusBDPsi to all flavour components with the twisted quark + mass and the mass splitting applied subsequently. + + \param[in] invclov_odiag_ The flavour off-diagonal inverse clover term + including the mass splitting. + + \param[in] invclov_ FullCloverBlock inverse clover term for the flavour-diagonal + contribution as applied by the Twisted Clover Dslash. */ - EvenOddNDTMCloverReuseOperator(SU3MatrixBlock *u_[2], - FullCloverBlock *clov_[2][2], - FullCloverBlock *invclov_[2][2][2], - double epsilon, + EvenOddNDTMCloverReuseOperator(double TwistedMass_, + double Epsilon_, + SU3MatrixBlock *u_[2], + CloverBlock *clov_, + CloverBlock *invclov_odiag_, + FullCloverBlock *invclov_[2], Geometry *geom_, double t_boundary, double aniso_coeff_s, @@ -57,77 +60,93 @@ class EvenOddNDTMCloverReuseOperator bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) : EvenOddNDTMCloverReuseOperator( + TwistedMass_, Epsilon_, geom_, t_boundary, aniso_coeff_s, aniso_coeff_t, use_tbc_, tbc_phases_) { - this->epsilon = epsilon; - setFields(u_, clov_, invclov_); + setFields(u_, clov_, invclov_odiag_, invclov_); } - EvenOddNDTMCloverReuseOperator(Geometry *geom_, + EvenOddNDTMCloverReuseOperator(double TwistedMass_, + double Epsilon_, + Geometry *geom_, double t_boundary, double aniso_coeff_s, double aniso_coeff_t, bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) - : D(new TMClovDslash( - geom_, t_boundary, aniso_coeff_s, aniso_coeff_t, use_tbc_, tbc_phases_)), - tmp{D->getGeometry().allocCBFourSpinor(), - D->getGeometry().allocCBFourSpinor()} + : Dtmcl(new TMClovDslash( + geom_, t_boundary, aniso_coeff_s, aniso_coeff_t, use_tbc_, tbc_phases_, + TwistedMass_)), + Dwcl(new ClovDslash( + geom_, t_boundary, aniso_coeff_s, aniso_coeff_t, use_tbc_, tbc_phases_)), + tmp{Dtmcl->getGeometry().allocCBFourSpinor(), + Dtmcl->getGeometry().allocCBFourSpinor()}, + odiag_tmp{Dtmcl->getGeometry().allocCBFourSpinor(), + Dtmcl->getGeometry().allocCBFourSpinor()}, + Epsilon(Epsilon_) { } ~EvenOddNDTMCloverReuseOperator() { - Geometry &geom = D->getGeometry(); - for (int f : {0, 1}) { - geom.free(tmp[f]); + Geometry &geom = Dtmcl->getGeometry(); + for (int fl : {0, 1}) { + geom.free(tmp[fl]); + geom.free(odiag_tmp[fl]); } } void setFields(SU3MatrixBlock *u_[2], - FullCloverBlock *clov_[2][2], - FullCloverBlock *invclov_[2][2][2]) + CloverBlock *clov_, + CloverBlock *invclov_odiag_, + FullCloverBlock *invclov_[2]) { - for (int pm : {0, 1}) { - u[pm] = u_[pm]; - for (int f1 : {0, 1}) { - clov[pm][f1] = clov_[pm][f1]; - for (int f2 : {0, 1}) { - invclov[pm][f1][f2] = invclov_[pm][f1][f2]; - } - } + for (int cb : {0, 1}) { + u[cb] = u_[cb]; + } + clov = clov_; + invclov_odiag = invclov_odiag_; + for (int fl : {0, 1}) { + invclov[fl] = invclov_[fl]; } } - void operator()(FourSpinorBlock *res[2], + void operator()(FourSpinorBlock *const res[2], const FourSpinorBlock *const in[2], - int isign, int target_cb) override + int isign, + int target_cb = 1) override { double beta = 0.25; const int other_cb = 1-target_cb; - D->two_flav_dslash(tmp, in, u[other_cb], invclov, isign, other_cb); - D->two_flav_achimbdpsi(res, tmp, in, u[target_cb], clov, beta, epsilon, isign, target_cb); + + for(int fl : {0, 1}){ + Dtmcl->dslash(tmp[fl], in[fl], u[other_cb], invclov, isign, other_cb, fl); + Dwcl->dslash(odiag_tmp[fl], in[1-fl], u[other_cb], invclov_odiag, isign, other_cb); + } + axpy(1.0, odiag_tmp, tmp, getGeometry(), getGeometry().getNSIMT()); + + Dtmcl->two_flav_AChiMinusBDPsi(res, tmp, in, u[target_cb], clov, beta, Epsilon, isign, target_cb); } Geometry &getGeometry() { - return D->getGeometry(); + return Dtmcl->getGeometry(); } private: - double Mass; - double epsilon; + double Epsilon; - // XXX Use a simple member, then there is no need for a virtual function - // call within `operator()`. - std::unique_ptr> D; + std::unique_ptr> Dtmcl; + std::unique_ptr> Dwcl; SU3MatrixBlock const *u[2]; - FullCloverBlock const *clov[2][2]; - FullCloverBlock const *invclov[2][2][2]; + CloverBlock const *clov; + CloverBlock const *invclov_odiag; + FullCloverBlock const *invclov[2]; FourSpinorBlock *tmp[2]; + FourSpinorBlock *odiag_tmp[2]; }; // Class }; // Namespace diff --git a/include/qphix/tm_clov_dslash_body.h b/include/qphix/tm_clov_dslash_body.h index 8b862246..f4020937 100644 --- a/include/qphix/tm_clov_dslash_body.h +++ b/include/qphix/tm_clov_dslash_body.h @@ -342,9 +342,15 @@ void TMClovDslash::dslash( const SU3MatrixBlock *u, const FullCloverBlock *const invclov[2], int isign, - int cb) + int cb, + int fl) { - DPsi(u, invclov[isign == 1 ? 0 : 1], psi, res, isign == 1, cb); + /* for the 'up' flavour (0), the FullCloverBlock with the positive twisted mass + * is used for the unconjugated operator and the block with negative twisted + * mass for the conjugated operator. For the 'down' flavour (1), the relationship + * is reversed. */ + const int clidx = fl == 0 ? ( isign == 1 ? 0 : 1 ) : ( isign == 1 ? 1 : 0 ); + DPsi(u, invclov[clidx], psi, res, isign == 1, cb); } template @@ -353,13 +359,14 @@ void TMClovDslash::dslashAChiMinusBDPsi( const FourSpinorBlock *psi, const FourSpinorBlock *chi, const SU3MatrixBlock *u, - const FullCloverBlock *clov[2], + const CloverBlock *clov, double beta, int isign, - int cb) + int cb, + int fl) { DPsiAChiMinusBDPsi( - u, clov[isign == 1 ? 0 : 1], psi, chi, res, beta, isign == 1, cb); + u, clov, psi, chi, res, beta, isign == 1, cb, fl); } template @@ -688,10 +695,11 @@ void TMClovDslash::DyzAChiMinusBDPsi( const FourSpinorBlock *chi, FourSpinorBlock *res, const SU3MatrixBlock *u, - const FullCloverBlock *clov, + const CloverBlock *clov, double beta, bool const is_plus, - int cb) + int cb, + int fl) { auto kernel = (is_plus @@ -915,10 +923,10 @@ void TMClovDslash::DyzAChiMinusBDPsi( (cx_next - cx)) * gauge_line_in_floats; - const FullCloverBlock *clBase = + const CloverBlock *clBase = &clov[(t * Pxyz + z * Pxy + yi * nvecs) / nyg + cx]; const int clov_line_in_floats = - sizeof(FullCloverBlock) / + sizeof(CloverBlock) / sizeof(FT); // One gauge scanline, in floats int clprefdist = (((t_next - t) * Pxyz + (z_next - z) * Pxy + (yi_next - yi) * nvecs) / @@ -970,7 +978,9 @@ void TMClovDslash::DyzAChiMinusBDPsi( FT beta_s_T = rep(beta_s); FT beta_t_f_T = rep(beta_t_f); FT beta_t_b_T = rep(beta_t_b); - FT prec_mass_rho_T = rep(prec_mass_rho); + // when working on the 'down' flavour, the negative twisted mass is passed + // down + FT prec_mass_rho_T = rep( (fl==1 ? -1.0 : 1.0 ) * prec_mass_rho); kernel(xyBase + X, zbBase + X, @@ -1102,13 +1112,14 @@ void TMClovDslash::DPsi( template void TMClovDslash::DPsiAChiMinusBDPsi( const SU3MatrixBlock *u, - const FullCloverBlock *clov, + const CloverBlock *clov, const FourSpinorBlock *psi_in, const FourSpinorBlock *chi_in, FourSpinorBlock *res_out, double beta, bool const is_plus, - int cb) + int cb, + int fl) { double beta_s = beta * aniso_coeff_S; @@ -1147,7 +1158,7 @@ void TMClovDslash::DPsiAChiMinusBDPsi( #pragma omp parallel { int tid = omp_get_thread_num(); - DyzAChiMinusBDPsi(tid, psi_in, chi_in, res_out, u, clov, beta, is_plus, cb); + DyzAChiMinusBDPsi(tid, psi_in, chi_in, res_out, u, clov, beta, is_plus, cb, fl); } #ifdef QPHIX_DO_COMMS @@ -1171,55 +1182,28 @@ void TMClovDslash::DPsiAChiMinusBDPsi( } template -template -void TMClovDslash::two_flav_dslash( - FourSpinorBlock *res[2], - Spinor1 *const psi[2], - const SU3MatrixBlock *u, - const FullCloverBlock *const invclov[2][2][2], - const int isign, - const int cb) -{ - FourSpinorBlock *tmp = s->allocCBFourSpinor(); - - // Iterate over the two result flavors … - for (int f : {0, 1}) { - // Compute the two summands of the current result flavor. - dslash(res[f], psi[0], u, invclov[f][0], isign, cb); - dslash(tmp, psi[1], u, invclov[f][1], isign, cb); - - // Add the two summands into the result. - const int n_blas_simt = 1; - axpy(1.0, tmp, res[f], *s, n_blas_simt); - } - - s->free(tmp); -} - -template -template -void TMClovDslash::two_flav_achimbdpsi( - FourSpinorBlock *res[2], - Spinor1 *const chi[2], - Spinor2 *const psi[2], +void TMClovDslash::two_flav_AChiMinusBDPsi( + FourSpinorBlock *const res[2], + const FourSpinorBlock *const chi[2], + const FourSpinorBlock *const psi[2], const SU3MatrixBlock *u, - const FullCloverBlock *clov[2][2], + const CloverBlock *clov, const double beta, const double epsilon, const int isign, const int cb) { - const int n_blas_simt = 1; + const int n_blas_simt = s->getNSIMT();; // Iterate over the two result flavors … - for (int f : {0, 1}) { + for (int fl : {0, 1}) { // Compute the flavor-diagonal part. - dslashAChiMinusBDPsi(res[f], chi[f], psi[f], u, clov[f], beta, isign, cb); + dslashAChiMinusBDPsi(res[fl], chi[fl], psi[fl], u, clov, beta, isign, cb, fl); // The `res[f]` contains the flavor-diagonal part. Now the flavor // off-diagonal part has to be added. This is just the opposite - // flavor χ multiplied with ε. - axpy(epsilon, chi[1 - f], res[f], *s, n_blas_simt); + // flavor χ multiplied with -ε. + axpy(-epsilon, chi[1 - fl], res[fl], *s, n_blas_simt); } } diff --git a/include/qphix/tm_clov_dslash_def.h b/include/qphix/tm_clov_dslash_def.h index 099908f6..3d5de1e7 100644 --- a/include/qphix/tm_clov_dslash_def.h +++ b/include/qphix/tm_clov_dslash_def.h @@ -20,6 +20,8 @@ class TMClovDslash TwoSpinorBlock; typedef typename Geometry::FullCloverBlock FullCloverBlock; + typedef typename Geometry::CloverBlock + CloverBlock; // Constructor TMClovDslash(Geometry *geom_, @@ -41,137 +43,23 @@ class TMClovDslash const SU3MatrixBlock *u, const FullCloverBlock *const invclov[2], int isign, - int cb); + int cb, + int fl = 0); // Apply the A*chi-b*D*psi operator - // w/ A = alpha 1 +/- i mu gamma_5 + c_SW T = clov, + // w/ A = alpha 1 + c_SW T = clov, // passed (and suitably packed) externally + // The twisted quark mass (plus any preconditioning msss) + // is applied via prec_mass_rho void dslashAChiMinusBDPsi(FourSpinorBlock *res, const FourSpinorBlock *psi, const FourSpinorBlock *chi, const SU3MatrixBlock *u, - const FullCloverBlock *clov[2], + const CloverBlock *clov, const double beta, int isign, - int cb); - - /** - Clover %Dslash for the non-degenerate twisted mass case. - - This implementation of the non-degenerate case uses the degenerate - twisted clover code. The existing and tested code is called four - times for each element in the flavor matrix. The flavor structure is - decomposed into single-flavor %Dslash applications where the existing - code is used. This way little new code has to be written, the chance - of introducing errors is reduced. - - The existing code computes \f$ \chi := A^{-1} D \psi \f$, where \f$ - \psi \f$ is a single flavor spinor. \f$ A = 4 + m \pm \mathrm i \mu - \gamma_5 + c_\mathrm{SW} T_\mathrm{ee} \f$ is the even-even term with - mass \f$ m \f$, twisted mass \f$ \mu \f$, and clover term \f$ T \f$ - defined on the even lattice sites only. The hopping matrix \f$ D \f$ - is just the Wilson %Dslash operator. - - In the degenerate case, it should actually be \f$ \mathrm i \mu - \gamma_5 \tau^3 \f$, where this is a two-flavor expression. It is - diagonal in flavor-space, therefore the determinant of the whole - expression will eventually be just the product of the determinants of - the two blocks. Exploding the diagonal structure, one gets away with - having a single-flavor operator where the sign of the \f$ \mu \f$ - term can be changed by hermitian conjugation. - - The non-degenerate twisted mass case has an additional \f$ - \epsilon - \tau^1 \f$ in the even-even term \f$ A \f$. Therefore it becomes - densely populated in flavor-space. The decomposition of the - determinant is no longer possible, therefore an additional operator - is needed. Also the inverse of \f$ A \f$ is not the inverse of the - blocks. The interface to this function accepts \f$ A^{-1} \f$ from - outside, so it is the caller's responsibility to make the appropriate - inversion of \f$ A \f$. - - Using the existing one-flavor %Dslash, this function will compute the - two-flavor %Dslash like this: - \f[ - - \begin{pmatrix} - \chi_\mathrm u \\ \chi_\mathrm d - \end{pmatrix} - := - \begin{pmatrix} - (A^{-1})_\mathrm{uu} & - (A^{-1})_\mathrm{ud} \\ - (A^{-1})_\mathrm{du} & - (A^{-1})_\mathrm{dd} - \end{pmatrix} - D - \begin{pmatrix} - \psi_\mathrm u \\ \psi_\mathrm d - \end{pmatrix} - = - \begin{pmatrix} - (A^{-1})_\mathrm{uu} D \psi_\mathrm u + - (A^{-1})_\mathrm{ud} D \psi_\mathrm d \\ - (A^{-1})_\mathrm{du} D \psi_\mathrm u + - (A^{-1})_\mathrm{dd} D \psi_\mathrm d - \end{pmatrix} - \,. - \f] - - There will be four calls to \ref dslash with different arguments. The - two resulting up-spinors will be accumulated, same with the two - down-spinors. The result are two separate spinors. - - \test By setting the flavor-off-diagonal parts of \f$ A^{-1} \f$ to a - zero matrix, the degenerate case is recovered again. This can then be - compared to two invocations of \ref dslash. This will make sure that - the decomposition of the flavor structure is done correctly. Also it - verifies that the accumulation of the intermediate results within the - flavor matrix multiplication is done correctly. - - \test Once the real non-degenerate implementation has been finished in - \ref NDTMClovDslash, the two implementations can be compared to each - other. The results should be very close numerically, though a - different order in the summations might lead to small rounding - deviations. - - \param[out] res Resulting two flavor spinor. The slowest index is the - flavor. - - \param[in] psi Input spinors. The slowest index is the flavor. - - \param[in] u Gauge field. - - \param[in] invclov The four blocks of \f$ (A^{-1})_{ff'} \f$. The - first (slowest) index iterates through the flavor index \f$ f \f$, it - coincides with the resulting spinor flavor index. The second index is - \f$ f' \f$ which coincides with the input spinor flavor indices. The - last (fastest) index is the same as for the \ref dslash function: It - is the hermitian conjugate of the inverse clover block. - - @param[in] isign The value \f$ +1 \f$ gives the operator as-is, the - value \f$ -1 \f$ applies the hermitian conjugate operator. - - @param[in] cb The checkerbord index of the target spinor, value 0 or - 1. - - \todo Figure out whether the hermitian conjugated FullCloverBlock - should also conjugate in flavor space. - - \todo We want to have \f$ \gamma_5 D \tau^3 \f$ as the overall - operator. It does not seem obvious how this translate to this operator - here, do we want \f$ \gamma_5 A^{-1} D \tau^3 \f$ or rather \f$ A^{-1} - \gamma_5 D \tau^3 \f$? - - \author Martin Ueding - - */ - template - void two_flav_dslash(FourSpinorBlock *res[2], - Spinor1 *const psi[2], - const SU3MatrixBlock *u, - const FullCloverBlock *const invclov[2][2][2], - const int isign, - const int cb); + int cb, + int fl = 0); /** Clover \f$ A \chi - b D \psi \f$ for the non-degenerate twisted mass @@ -236,17 +124,67 @@ class TMClovDslash explained again here. \author Martin Ueding + \author Bartosz Kostrzewa */ - template - void two_flav_achimbdpsi(FourSpinorBlock *res[2], - Spinor1 *const chi[2], - Spinor2 *const psi[2], - const SU3MatrixBlock *u, - const FullCloverBlock *clov[2][2], - const double beta, - const double epsilon, - const int isign, - const int cb); + void two_flav_AChiMinusBDPsi(FourSpinorBlock *const res[2], + const FourSpinorBlock *const chi[2], + const FourSpinorBlock *const psi[2], + const SU3MatrixBlock *u, + const CloverBlock *clov, + const double beta, + const double epsilon, + const int isign, + const int cb); + +#ifdef __INTEL_COMPILER + void two_flav_AChiMinusBDPsi(FourSpinorBlock *const res[2], + FourSpinorBlock *const psi[2], + FourSpinorBlock *const chi[2], + const SU3MatrixBlock *u, + const CloverBlock *clov, + double beta, + double epsilon, + int isign, + int cb) + { + this->two_flav_AChiMinusBDPsi(res, + const_cast(psi), + const_cast(chi), + u, alpha, beta, epsilon, isign, cb); + } + + void two_flav_AChiMinusBDPsi(FourSpinorBlock * const res[2], + FourSpinorBlock * psi[2], + FourSpinorBlock * chi[2], + const SU3MatrixBlock *u, + const CloverBlock *clov, + double beta, + double epsilon, + int isign, + int cb) + { + this->two_flav_AChiMinusBDPsi(res, + const_cast(psi), + const_cast(chi), + u, alpha, beta, epsilon, isign, cb); + } + + void two_flav_AChiMinusBDPsi(FourSpinorBlock * const res[2], + FourSpinorBlock * psi[2], + const FourSpinorBlock * const chi[2], + const SU3MatrixBlock *u, + const CloverBlock *clov, + double beta, + double epsilon, + int isign, + int cb) + { + this->two_flav_AChiMinusBDPsi(res, + const_cast(psi), + const_cast(chi), + u, alpha, beta, epsilon, isign, cb); + } +#endif // __INTEL_COMPILER workaround void free(void *p); @@ -311,10 +249,11 @@ class TMClovDslash const FourSpinorBlock *chi, FourSpinorBlock *res, const SU3MatrixBlock *u, - const FullCloverBlock *clov, + const CloverBlock *clov, double beta, bool const is_plus, - int cb); + int cb, + int fl = 0); void DPsi(const SU3MatrixBlock *u, const FullCloverBlock *invclov, @@ -324,15 +263,15 @@ class TMClovDslash int cb); void DPsiAChiMinusBDPsi(const SU3MatrixBlock *u, - const FullCloverBlock *clov, + const CloverBlock *clov, const FourSpinorBlock *psi_in, const FourSpinorBlock *chi, FourSpinorBlock *res_out, double beta, bool const is_plus, - int cb); + int cb, + int fl = 0); -// DISABLE COMMS FOR NOW #ifdef QPHIX_DO_COMMS void packFaceDir(int tid, const FourSpinorBlock *psi, diff --git a/include/qphix/twisted_mass_clover.h b/include/qphix/twisted_mass_clover.h index 9c720d8f..c0be3a2b 100644 --- a/include/qphix/twisted_mass_clover.h +++ b/include/qphix/twisted_mass_clover.h @@ -14,6 +14,8 @@ class EvenOddTMCloverOperator public: typedef typename Geometry::FullCloverBlock FullCloverBlock; + typedef typename Geometry::CloverBlock + CloverBlock; typedef typename Geometry::FourSpinorBlock FourSpinorBlock; typedef typename Geometry::SU3MatrixBlock @@ -22,7 +24,7 @@ class EvenOddTMCloverOperator // Constructor // No anisotropy, all boundaries periodic for now. EvenOddTMCloverOperator(SU3MatrixBlock *u_[2], - FullCloverBlock *clov_[2], + CloverBlock *clov_, FullCloverBlock *invclov_[2], Geometry *geom_, double t_boundary, @@ -42,8 +44,7 @@ class EvenOddTMCloverOperator Geometry &geom = D->getGeometry(); u[0] = u_[0]; u[1] = u_[1]; - clov[0] = clov_[0]; - clov[1] = clov_[1]; + clov = clov_; invclov[0] = invclov_[0]; invclov[1] = invclov_[1]; tmp = (FourSpinorBlock *)geom.allocCBFourSpinor(); @@ -77,13 +78,12 @@ class EvenOddTMCloverOperator } void setFields(SU3MatrixBlock *u_[2], - FullCloverBlock *clov_[2], + CloverBlock *clov_, FullCloverBlock *invclov_[2]) { u[0] = u_[0]; u[1] = u_[1]; - clov[0] = clov_[0]; - clov[1] = clov_[1]; + clov = clov_; invclov[0] = invclov_[0]; invclov[1] = invclov_[1]; } @@ -102,7 +102,7 @@ class EvenOddTMCloverOperator tmp, in, u[target_cb], - (const FullCloverBlock **)clov, + clov, beta, isign, target_cb); @@ -117,7 +117,7 @@ class EvenOddTMCloverOperator double Mass; TMClovDslash *D; SU3MatrixBlock *u[2]; - FullCloverBlock *clov[2]; + CloverBlock *clov; FullCloverBlock *invclov[2]; FourSpinorBlock *tmp;