Skip to content

Commit d06c3cf

Browse files
Use llama for particle frame and shared memory DataBox layout
1 parent c319769 commit d06c3cf

File tree

40 files changed

+572
-188
lines changed

40 files changed

+572
-188
lines changed

include/picongpu/algorithms/Set.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ namespace picongpu
3333
}
3434

3535
template<typename Dst, typename T_Worker>
36-
HDINLINE void operator()(T_Worker const&, Dst& dst) const
36+
HDINLINE void operator()(T_Worker const&, Dst&& dst) const
3737
{
38-
dst = value;
38+
std::forward<Dst>(dst) = value;
3939
}
4040

4141
private:

include/picongpu/fields/FieldJ.kernel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ namespace picongpu
131131
// The rest uses normal weighting
132132
const float_X weighting = particle[weighting_];
133133
Velocity velocity;
134-
const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle));
134+
const float3_X vel = velocity(static_cast<float3_X>(particle[momentum_]), attribute::getMass(weighting, particle));
135135
auto fieldJShiftToParticle = jBox.shift(localCell);
136136
ParticleAlgo perParticle;
137137
perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime);

include/picongpu/fields/FieldTmp.kernel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ namespace picongpu
9494
if(!forEachParticle.hasParticles())
9595
return;
9696

97-
auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
97+
auto cachedVal = CachedBox::create<0, sharedDataBoxMapping, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
9898
Set<typename T_TmpBox::ValueType> set(float_X(0.0));
9999

100100
auto collective = makeThreadCollective<T_BlockDescription>();

include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
#include "picongpu/simulation_defines.hpp"
2424

25+
#include "picongpu/param/memory.param"
26+
2527
#include <pmacc/dimensions/SuperCellDescription.hpp>
2628
#include <pmacc/lockstep.hpp>
2729
#include <pmacc/mappings/threads/ThreadCollective.hpp>
@@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver
6870

6971
constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
7072

71-
auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea());
73+
auto cachedJ = CachedBox::create<0, sharedDataBoxMapping, typename FieldJ::DataBoxType::ValueType>(
74+
worker,
75+
BlockArea());
7276

7377
pmacc::math::operation::Assign assign;
7478
DataSpace<simDim> const block(

include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ namespace picongpu
179179
auto srcFieldBlock = srcField.shift(beginCellIdx);
180180
auto cacheStencilArea = makeThreadCollective<StencilCfg>();
181181
auto cachedSrcField
182-
= CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{});
182+
= CachedBox::create<0u, sharedDataBoxMapping, typename T_SrcBox::ValueType>(worker, StencilCfg{});
183183
cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock);
184184

185185
worker.sync();

include/picongpu/fields/currentDeposition/Cache.hpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,10 @@ namespace picongpu
4848
*/
4949
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
5050
DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox)
51-
#if(!BOOST_COMP_CLANG)
52-
-> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>(
53-
worker,
54-
std::declval<T_BlockDescription>()))
55-
#endif
5651
{
5752
using ValueType = typename T_FieldBox::ValueType;
5853
/* this memory is used by all virtual blocks */
59-
auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{});
54+
auto cache = CachedBox::create<0u, sharedDataBoxMapping, ValueType>(worker, T_BlockDescription{});
6055

6156
Set<ValueType> set(ValueType::create(0.0_X));
6257
auto collectiveFill = makeThreadCollective<T_BlockDescription>();
@@ -90,9 +85,6 @@ namespace picongpu
9085
*/
9186
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
9287
DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox)
93-
#if(!BOOST_COMP_CLANG)
94-
-> T_FieldBox
95-
#endif
9688
{
9789
return fieldBox;
9890
}

include/picongpu/param/memory.param

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,7 @@ namespace picongpu
114114
*/
115115
constexpr bool fieldTmpSupportGatherCommunication = true;
116116

117+
inline constexpr auto particleFrameMapping = pmacc::ParticleFrameMapping::SoA;
118+
119+
inline constexpr auto sharedDataBoxMapping = pmacc::SharedDataBoxMapping::AoS;
117120
} // namespace picongpu

include/picongpu/particles/Particles.hpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "picongpu/fields/Fields.def"
2424
#include "picongpu/fields/Fields.hpp"
25+
#include "picongpu/param/memory.param"
2526
#include "picongpu/particles/boundary/Description.hpp"
2627
#include "picongpu/particles/boundary/Utility.hpp"
2728
#include "picongpu/particles/manipulators/manipulators.def"
@@ -91,7 +92,10 @@ namespace picongpu
9192
// fallback if the species has not defined the alias boundaryCondition
9293
pmacc::HandleGuardRegion<
9394
pmacc::particles::policies::ExchangeParticles,
94-
pmacc::particles::policies::DoNothing>>::type>,
95+
pmacc::particles::policies::DoNothing>>::type,
96+
bmpl::vector0<>,
97+
bmpl::vector0<>,
98+
picongpu::particleFrameMapping>,
9599
MappingDesc,
96100
DeviceHeap>
97101
, public ISimulationData
@@ -110,7 +114,10 @@ namespace picongpu
110114
// fallback if the species has not defined the alias boundaryCondition
111115
pmacc::HandleGuardRegion<
112116
pmacc::particles::policies::ExchangeParticles,
113-
pmacc::particles::policies::DoNothing>>::type>;
117+
pmacc::particles::policies::DoNothing>>::type,
118+
bmpl::vector0<>,
119+
bmpl::vector0<>,
120+
picongpu::particleFrameMapping>;
114121
using ParticlesBaseType = ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>;
115122
using FrameType = typename ParticlesBaseType::FrameType;
116123
using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder;

include/picongpu/particles/Particles.kernel

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,8 @@ namespace picongpu
222222

223223
onlyMaster([&]() { mustShiftSupercell = 0; });
224224

225-
auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain());
226-
auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain());
225+
auto cachedB = CachedBox::create<0, sharedDataBoxMapping, typename T_BBox::ValueType>(worker, T_DataDomain());
226+
auto cachedE = CachedBox::create<1, sharedDataBoxMapping, typename T_EBox::ValueType>(worker, T_DataDomain());
227227

228228
worker.sync();
229229

include/picongpu/particles/Particles.tpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <pmacc/traits/Resolve.hpp>
4545

4646
#include <algorithm>
47+
#include <fstream>
4748
#include <iostream>
4849
#include <limits>
4950
#include <memory>
@@ -214,6 +215,34 @@ namespace picongpu
214215

215216
size_t sizeOfExchanges = 0u;
216217

218+
// std::cout << "Frame:\n\tSize: " << sizeof(FrameType) << "\n\tLLAMA view offset: " << offsetof(FrameType,
219+
// view) << "\n\tLLAMA view size: " << sizeof(FrameType::view) << std::endl;
220+
221+
{
222+
using View = decltype(FrameType::view);
223+
using M = typename View::Mapping;
224+
auto m = M{};
225+
// View view;
226+
// std::cout << "view begin " << &view << "\n";
227+
// std::cout << "storage begin " << &view.storageBlobs[0][0] << "\n";
228+
// for(auto ai : llama::ArrayIndexRange{m.extents()})
229+
//{
230+
// llama::forEachLeafCoord<typename M::RecordDim>(
231+
// [&](auto rc)
232+
// {
233+
// auto& e = view(ai)(rc);
234+
// std::cout << "ai " << ai << " rc " << rc << " addr " << (void*) &e << "\n";
235+
// });
236+
// }
237+
// std::cout << "view end " << (&view + 1) << "\n";
238+
239+
std::ofstream{"llama_frame.html"} << llama::toHtml(m);
240+
std::ofstream{"llama_frame.svg"} << llama::toSvg(m);
241+
242+
// PMACC_VERIFY(
243+
// reinterpret_cast<std::byte*>(&view) == reinterpret_cast<std::byte*>(&view.storageBlobs[0][0]));
244+
}
245+
217246
const uint32_t commTag = pmacc::traits::GetUniqueTypeId<FrameType, uint32_t>::uid();
218247
log<picLog::MEMORY>("communication tag for species %1%: %2%") % FrameType::getName() % commTag;
219248

0 commit comments

Comments
 (0)