Skip to content

Commit 656aab9

Browse files
Use llama for particle frame and shared memory DataBox layout
Also support LLAMA frames in the IO.
1 parent 5da4754 commit 656aab9

File tree

54 files changed

+852
-260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+852
-260
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "thirdParty/llama"]
2+
path = thirdParty/llama
3+
url = https://github.com/alpaka-group/llama

include/picongpu/algorithms/Set.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ namespace picongpu
3333
}
3434

3535
template<typename Dst, typename T_Worker>
36-
HDINLINE void operator()(T_Worker const&, Dst& dst) const
36+
HDINLINE void operator()(T_Worker const&, Dst&& dst) const
3737
{
38-
dst = value;
38+
std::forward<Dst>(dst) = value;
3939
}
4040

4141
private:

include/picongpu/algorithms/Velocity.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ namespace picongpu
2828
struct Velocity
2929
{
3030
template<typename MomType, typename MassType>
31-
HDINLINE MomType operator()(const MomType mom, const MassType mass0)
31+
HDINLINE auto operator()(const MomType mom, const MassType mass0)
3232
{
3333
const float_X rc2 = MUE0_EPS0;
3434
const float_X m0_2 = mass0 * mass0;

include/picongpu/fields/FieldJ.kernel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ namespace picongpu
131131
// The rest uses normal weighting
132132
const float_X weighting = particle[weighting_];
133133
Velocity velocity;
134-
const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle));
134+
const float3_X vel
135+
= velocity(static_cast<float3_X>(particle[momentum_]), attribute::getMass(weighting, particle));
135136
auto fieldJShiftToParticle = jBox.shift(localCell);
136137
ParticleAlgo perParticle;
137138
perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime);

include/picongpu/fields/FieldTmp.kernel

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ namespace picongpu
9494
if(!forEachParticle.hasParticles())
9595
return;
9696

97-
auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
97+
auto cachedVal = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_TmpBox::ValueType>(
98+
worker,
99+
T_BlockDescription{});
98100
Set<typename T_TmpBox::ValueType> set(float_X(0.0));
99101

100102
auto collective = makeThreadCollective<T_BlockDescription>();

include/picongpu/fields/MaxwellSolver/AddCurrentDensity.kernel

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
#include "picongpu/simulation_defines.hpp"
2424

25+
#include "picongpu/param/memory.param"
26+
2527
#include <pmacc/dimensions/SuperCellDescription.hpp>
2628
#include <pmacc/lockstep.hpp>
2729
#include <pmacc/mappings/threads/ThreadCollective.hpp>
@@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver
6870

6971
constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
7072

71-
auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea());
73+
auto cachedJ = CachedBox::create<0, SharedDataBoxMemoryLayout, typename FieldJ::DataBoxType::ValueType>(
74+
worker,
75+
BlockArea());
7276

7377
pmacc::math::operation::Assign assign;
7478
DataSpace<simDim> const block(

include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,9 @@ namespace picongpu
179179
auto srcFieldBlock = srcField.shift(beginCellIdx);
180180
auto cacheStencilArea = makeThreadCollective<StencilCfg>();
181181
auto cachedSrcField
182-
= CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{});
182+
= CachedBox::create<0u, SharedDataBoxMemoryLayout, typename T_SrcBox::ValueType>(
183+
worker,
184+
StencilCfg{});
183185
cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock);
184186

185187
worker.sync();

include/picongpu/fields/currentDeposition/Cache.hpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,11 @@ namespace picongpu
4848
*/
4949
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
5050
DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox)
51-
#if(!BOOST_COMP_CLANG)
52-
-> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>(
53-
worker,
54-
std::declval<T_BlockDescription>()))
55-
#endif
5651
{
5752
using ValueType = typename T_FieldBox::ValueType;
5853
/* this memory is used by all virtual blocks */
59-
auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{});
54+
auto cache
55+
= CachedBox::create<0u, SharedDataBoxMemoryLayout, ValueType>(worker, T_BlockDescription{});
6056

6157
Set<ValueType> set(ValueType::create(0.0_X));
6258
auto collectiveFill = makeThreadCollective<T_BlockDescription>();
@@ -90,9 +86,6 @@ namespace picongpu
9086
*/
9187
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
9288
DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox)
93-
#if(!BOOST_COMP_CLANG)
94-
-> T_FieldBox
95-
#endif
9689
{
9790
return fieldBox;
9891
}

include/picongpu/fields/incidentField/Solver.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,9 @@ namespace picongpu
283283
using IntVector = pmacc::math::Vector<int, simDim>;
284284
auto const beginLocalUserIdx
285285
= Index{math::max(IntVector{beginUserIdx - totalCellOffset}, IntVector::create(0))};
286-
auto const endLocalUserIdx
287-
= Index{math::min(IntVector{endUserIdx - totalCellOffset}, IntVector{localDomain.size})};
286+
auto const endLocalUserIdx = Index{math::min(
287+
IntVector{endUserIdx - totalCellOffset},
288+
static_cast<const IntVector&>(localDomain.size))};
288289

289290
// Check if we have any active cells in the local domain
290291
bool areAnyCellsInLocalDomain = true;

include/picongpu/param/memory.param

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,19 @@ namespace picongpu
114114
*/
115115
constexpr bool fieldTmpSupportGatherCommunication = true;
116116

117+
struct ParticleFrameMemoryLayout
118+
: llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
119+
{
120+
inline static constexpr bool splitVector = false;
121+
};
122+
123+
struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
124+
{
125+
inline static constexpr bool splitVector = false;
126+
};
127+
128+
struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
129+
{
130+
inline static constexpr bool splitVector = false;
131+
};
117132
} // namespace picongpu

include/picongpu/particles/Particles.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "picongpu/fields/Fields.def"
2424
#include "picongpu/fields/Fields.hpp"
25+
#include "picongpu/param/memory.param"
2526
#include "picongpu/particles/boundary/Description.hpp"
2627
#include "picongpu/particles/boundary/Utility.hpp"
2728
#include "picongpu/particles/manipulators/manipulators.def"
@@ -89,6 +90,7 @@ namespace picongpu
8990
pmacc::HandleGuardRegion<
9091
pmacc::particles::policies::ExchangeParticles,
9192
pmacc::particles::policies::DoNothing>>>,
93+
ParticleFrameMemoryLayout,
9294
MappingDesc,
9395
DeviceHeap>
9496
, public ISimulationData
@@ -108,7 +110,8 @@ namespace picongpu
108110
pmacc::HandleGuardRegion<
109111
pmacc::particles::policies::ExchangeParticles,
110112
pmacc::particles::policies::DoNothing>>>;
111-
using ParticlesBaseType = ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>;
113+
using ParticlesBaseType
114+
= ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>;
112115
using FrameType = typename ParticlesBaseType::FrameType;
113116
using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder;
114117
using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType;

include/picongpu/particles/Particles.kernel

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,10 @@ namespace picongpu
222222

223223
onlyMaster([&]() { mustShiftSupercell = 0; });
224224

225-
auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain());
226-
auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain());
225+
auto cachedB
226+
= CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_BBox::ValueType>(worker, T_DataDomain());
227+
auto cachedE
228+
= CachedBox::create<1, SharedDataBoxMemoryLayout, typename T_EBox::ValueType>(worker, T_DataDomain());
227229

228230
worker.sync();
229231

include/picongpu/particles/Particles.tpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <pmacc/traits/Resolve.hpp>
4444

4545
#include <algorithm>
46+
#include <fstream>
4647
#include <iostream>
4748
#include <limits>
4849
#include <memory>
@@ -197,7 +198,9 @@ namespace picongpu
197198
const std::shared_ptr<DeviceHeap>& heap,
198199
picongpu::MappingDesc cellDescription,
199200
SimulationDataId datasetID)
200-
: ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>(heap, cellDescription)
201+
: ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>(
202+
heap,
203+
cellDescription)
201204
, m_datasetID(datasetID)
202205
{
203206
constexpr bool particleHasShape = pmacc::traits::HasIdentifier<FrameType, shape<>>::type::value;
@@ -212,6 +215,21 @@ namespace picongpu
212215

213216
size_t sizeOfExchanges = 0u;
214217

218+
#if __has_include(<fmt/format.h>)
219+
// dump the data layout of the particle frames
220+
if constexpr(PIConGPUVerbose::log_level & picLog::MEMORY::lvl)
221+
{
222+
log<picLog::MEMORY>(
223+
"Dumping LLAMA memory layout for frame and border into llama_frame.* and llama_border_fream.*");
224+
auto fm = typename decltype(FrameType::view)::Mapping{};
225+
std::ofstream{"llama_frame.html"} << llama::toHtml(fm);
226+
std::ofstream{"llama_frame.svg"} << llama::toSvg(fm);
227+
auto bfm = typename decltype(FrameTypeBorder::view)::Mapping{};
228+
std::ofstream{"llama_border_frame.html"} << llama::toHtml(bfm);
229+
std::ofstream{"llama_border_frame.svg"} << llama::toSvg(bfm);
230+
}
231+
#endif
232+
215233
const uint32_t commTag = pmacc::traits::getUniqueId();
216234
log<picLog::MEMORY>("communication tag for species %1%: %2%") % FrameType::getName() % commTag;
217235

include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "picongpu/fields/CellType.hpp"
2525
#include "picongpu/fields/FieldTmp.hpp"
26+
#include "picongpu/param/memory.param"
2627
#include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
2728
#include "picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp"
2829
#include "picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def"
@@ -104,8 +105,20 @@ namespace picongpu
104105
PMACC_ALIGN(eneBox, FieldTmp::DataBoxType);
105106

106107
/* shared memory EM-field device databoxes */
107-
PMACC_ALIGN(cachedRho, DataBox<SharedBox<ValueType_Rho, typename BlockArea::FullSuperCellSize, 0>>);
108-
PMACC_ALIGN(cachedEne, DataBox<SharedBox<ValueType_Ene, typename BlockArea::FullSuperCellSize, 1>>);
108+
PMACC_ALIGN(
109+
cachedRho,
110+
DataBox<SharedBox<
111+
ValueType_Rho,
112+
typename BlockArea::FullSuperCellSize,
113+
0,
114+
SharedDataBoxMemoryLayout>>);
115+
PMACC_ALIGN(
116+
cachedEne,
117+
DataBox<SharedBox<
118+
ValueType_Ene,
119+
typename BlockArea::FullSuperCellSize,
120+
1,
121+
SharedDataBoxMemoryLayout>>);
109122

110123
public:
111124
/* host constructor initializing member : random number generator */
@@ -185,8 +198,8 @@ namespace picongpu
185198
DINLINE void collectiveInit(const T_Worker& worker, const DataSpace<simDim>& blockCell)
186199
{
187200
/* caching of density and "temperature" fields */
188-
cachedRho = CachedBox::create<0, ValueType_Rho>(worker, BlockArea());
189-
cachedEne = CachedBox::create<1, ValueType_Ene>(worker, BlockArea());
201+
cachedRho = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_Rho>(worker, BlockArea());
202+
cachedEne = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_Ene>(worker, BlockArea());
190203

191204
/* instance of nvidia assignment operator */
192205
pmacc::math::operation::Assign assign;

include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,14 @@ namespace picongpu
102102
PMACC_ALIGN(bBox, FieldB::DataBoxType);
103103
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
104104
/* shared memory EM-field device databoxes */
105-
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
106-
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
105+
PMACC_ALIGN(
106+
cachedE,
107+
DataBox<
108+
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
109+
PMACC_ALIGN(
110+
cachedB,
111+
DataBox<
112+
SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);
107113

108114
public:
109115
/* host constructor initializing member : random number generator */
@@ -137,8 +143,8 @@ namespace picongpu
137143
jBox = jBox.shift(blockCell);
138144

139145
/* caching of E and B fields */
140-
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
141-
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
146+
cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
147+
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
142148

143149
/* instance of nvidia assignment operator */
144150
pmacc::math::operation::Assign assign;

include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "picongpu/fields/FieldB.hpp"
2626
#include "picongpu/fields/FieldE.hpp"
2727
#include "picongpu/fields/FieldJ.hpp"
28+
#include "picongpu/param/memory.param"
2829
#include "picongpu/particles/ParticlesFunctors.hpp"
2930
#include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
3031
#include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp"
@@ -93,7 +94,10 @@ namespace picongpu
9394
FieldE::DataBoxType eBox;
9495
FieldJ::DataBoxType jBox;
9596
/* shared memory EM-field device databoxes */
96-
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
97+
PMACC_ALIGN(
98+
cachedE,
99+
DataBox<
100+
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
97101

98102
public:
99103
/* host constructor */
@@ -125,7 +129,7 @@ namespace picongpu
125129
jBox = jBox.shift(blockCell);
126130

127131
/* caching of E field */
128-
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
132+
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
129133

130134
/* instance of nvidia assignment operator */
131135
pmacc::math::operation::Assign assign;

include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,14 @@ namespace picongpu
102102
PMACC_ALIGN(bBox, FieldB::DataBoxType);
103103
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
104104
/* shared memory EM-field device databoxes */
105-
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
106-
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
105+
PMACC_ALIGN(
106+
cachedE,
107+
DataBox<
108+
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
109+
PMACC_ALIGN(
110+
cachedB,
111+
DataBox<
112+
SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);
107113

108114
public:
109115
/* host constructor initializing member : random number generator */
@@ -137,8 +143,8 @@ namespace picongpu
137143
jBox = jBox.shift(blockCell);
138144

139145
/* caching of E and B fields */
140-
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
141-
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
146+
cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
147+
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());
142148

143149
/* instance of nvidia assignment operator */
144150
pmacc::math::operation::Assign assign;

include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ namespace picongpu
160160
/* create shared mem */
161161
constexpr int blockCellsInDir = SuperCellSize::template at<r_dir>::type::value;
162162
using SharedMemSize = SuperCellDescription<pmacc::math::CT::Int<num_pbins, blockCellsInDir>>;
163-
auto sharedMemHist = CachedBox::create<0u, float_PS>(worker, SharedMemSize{});
163+
auto sharedMemHist = CachedBox::create<0u, SharedDataBoxMemoryLayout, float_PS>(worker, SharedMemSize{});
164164

165165
Set<float_PS> set(float_PS{0.0});
166166
auto collectiveOnSharedHistogram = makeThreadCollective<SharedMemSize>();

0 commit comments

Comments
 (0)