Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix wrong output size in SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads #4408

Merged
merged 7 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update evaluateValue
  • Loading branch information
ye-luo committed Jan 22, 2023
commit deb440d8785ccf5986b548f7d582b4cbc5767763
2 changes: 1 addition & 1 deletion src/QMCWaveFunctions/BsplineFactory/SplineC2R.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace qmcplusplus
* The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded.
* The first nComplexBands complex splines produce 2 real orbitals.
* The rest complex splines produce 1 real orbital.
* All the output orbitals are real.
* All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize.
*/
template<typename ST>
class SplineC2R : public BsplineSet
Expand Down
21 changes: 13 additions & 8 deletions src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,24 +248,27 @@ void SplineC2ROMPTarget<ST>::evaluateValue(const ParticleSet& P, const int iat,
const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;

const auto spline_padded_size = myV.size();
const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
offload_scratch.resize(spline_padded_size);
results_scratch.resize(sposet_padded_size);

// Ye: need to extract sizes and pointers before entering target region
const auto requested_orb_size = psi.size();
const auto* spline_ptr = SplineInst->getSplinePtr();
auto* offload_scratch_ptr = offload_scratch.data();
auto* psi_ptr = psi.data();
const auto* spline_ptr = SplineInst->getSplinePtr();
auto* offload_scratch_ptr = offload_scratch.data();
auto* results_scratch_ptr = results_scratch.data();
auto* psi_ptr = psi.data();
const auto x = r[0], y = r[1], z = r[2];
const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
const auto myKcart_padded_size = myKcart->capacity();
auto* myKcart_ptr = myKcart->data();
const size_t first_spo_local = first_spo;
const size_t nComplexBands_local = nComplexBands;
const auto requested_orb_size = psi.size();

{
ScopedTimer offload(offload_timer_);
PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
map(always, from: psi_ptr[0:requested_orb_size])")
map(always, from: results_scratch_ptr[0:sposet_padded_size])")
for (int team_id = 0; team_id < NumTeams; team_id++)
{
const size_t first = ChunkSizePerTeam * team_id;
Expand All @@ -283,10 +286,12 @@ void SplineC2ROMPTarget<ST>::evaluateValue(const ParticleSet& P, const int iat,
const size_t last_cplx = last / 2;
PRAGMA_OFFLOAD("omp parallel for")
for (int index = first_cplx; index < last_cplx; index++)
C2R::assign_v(x, y, z, psi_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local,
nComplexBands_local, index);
//FIXME psi should be assigned in a separate stage with requested_orb_size
C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size,
first_spo_local, nComplexBands_local, index);
}

for (size_t i = 0; i < requested_orb_size; i++)
psi[i] = results_scratch[i];
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace qmcplusplus
* The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded.
* The first nComplexBands complex splines produce 2 real orbitals.
* The rest complex splines produce 1 real orbital.
* All the output orbitals are real.
* All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize.
*/
template<typename ST>
class SplineC2ROMPTarget : public BsplineSet
Expand Down