Skip to content

Commit b9d549b

Browse files
authored
Add option to do stable redistribute with GPUs (#4200)
Currently the order of particles in memory will not be consistent if you run the same simulation twice on the GPU. This usually doesn't matter, but for cases where it does, this adds a slower, stable option. The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate
1 parent 47108f0 commit b9d549b

File tree

4 files changed

+60
-20
lines changed

4 files changed

+60
-20
lines changed

Src/Particle/AMReX_ParticleBufferMap.H

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ struct GetBucket
3636
const int* m_lev_gid_to_bucket;
3737
const int* m_lev_offsets;
3838

39-
GetBucket (const Gpu::DeviceVector<int>& lev_gid_to_bucket,
40-
const Gpu::DeviceVector<int>& lev_offsets)
41-
: m_lev_gid_to_bucket(lev_gid_to_bucket.dataPtr()),
42-
m_lev_offsets(lev_offsets.dataPtr())
39+
GetBucket (const int* lev_gid_to_bucket_ptr,
40+
const int* lev_offsets_ptr)
41+
: m_lev_gid_to_bucket(lev_gid_to_bucket_ptr),
42+
m_lev_offsets(lev_offsets_ptr)
4343
{}
4444

4545
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
@@ -154,7 +154,9 @@ public:
154154
}
155155

156156
[[nodiscard]] GetPID getPIDFunctor () const noexcept { return GetPID(d_bucket_to_pid, d_lev_gid_to_bucket, d_lev_offsets);}
157-
[[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket, d_lev_offsets);}
157+
[[nodiscard]] GetBucket getBucketFunctor () const noexcept { return GetBucket(d_lev_gid_to_bucket.data(), d_lev_offsets.data());}
158+
[[nodiscard]] GetBucket getHostBucketFunctor () const noexcept { return GetBucket(m_lev_gid_to_bucket.data(), m_lev_offsets.data());}
159+
158160
};
159161

160162
} // namespace amrex

Src/Particle/AMReX_ParticleCommunication.H

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,12 @@ struct ParticleCopyPlan
154154
m_box_counts_d.resize(num_buckets+1, 0);
155155
m_box_offsets.resize(num_buckets+1);
156156
auto* p_dst_box_counts = m_box_counts_d.dataPtr();
157-
auto getBucket = pc.BufferMap().getBucketFunctor();
157+
auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor();
158+
159+
Gpu::HostVector<unsigned int> h_box_counts;
160+
if (pc.stableRedistribute() ) {
161+
h_box_counts.resize(m_box_counts_d.size(), 0);
162+
}
158163

159164
m_dst_indices.resize(num_levels);
160165
for (int lev = 0; lev < num_levels; ++lev)
@@ -166,24 +171,49 @@ struct ParticleCopyPlan
166171
if (num_copies == 0) { continue; }
167172
m_dst_indices[lev][gid].resize(num_copies);
168173

169-
const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
170-
const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
171-
auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();
172-
173-
AMREX_FOR_1D ( num_copies, i,
174-
{
175-
int dst_box = p_boxes[i];
176-
if (dst_box >= 0)
177-
{
178-
int dst_lev = p_levs[i];
179-
int index = static_cast<int>(Gpu::Atomic::Add(
180-
&p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
181-
p_dst_indices[i] = index;
174+
if (pc.stableRedistribute()) {
175+
const Gpu::DeviceVector<int>& d_boxes = op.m_boxes[lev].at(gid);
176+
Gpu::HostVector<int> h_boxes(d_boxes.size());
177+
Gpu::copy(Gpu::deviceToHost,d_boxes.begin(),d_boxes.end(),h_boxes.begin());
178+
179+
const Gpu::DeviceVector<int>& d_levs = op.m_levels[lev].at(gid);
180+
Gpu::HostVector<int> h_levs(d_levs.size());
181+
Gpu::copy(Gpu::deviceToHost,d_levs.begin(),d_levs.end(),h_levs.begin());
182+
183+
Gpu::HostVector<int> h_dst_indices(num_copies);
184+
for (int i = 0; i < num_copies; ++i) {
185+
int dst_box = h_boxes[i];
186+
if (dst_box >= 0) {
187+
int dst_lev = h_levs[i];
188+
int index = static_cast<int>(h_box_counts[getBucket(dst_lev, dst_box)]++);
189+
h_dst_indices[i] = index;
190+
}
182191
}
183-
});
192+
Gpu::copy(Gpu::hostToDevice,h_dst_indices.begin(),h_dst_indices.end(),m_dst_indices[lev][gid].begin());
193+
}
194+
else {
195+
const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
196+
const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
197+
auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();
198+
AMREX_FOR_1D ( num_copies, i,
199+
{
200+
int dst_box = p_boxes[i];
201+
if (dst_box >= 0)
202+
{
203+
int dst_lev = p_levs[i];
204+
int index = static_cast<int>(Gpu::Atomic::Add(
205+
&p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
206+
p_dst_indices[i] = index;
207+
}
208+
});
209+
}
184210
}
185211
}
186212

213+
if (pc.stableRedistribute()) {
214+
Gpu::copy(Gpu::hostToDevice,h_box_counts.begin(),h_box_counts.end(),m_box_counts_d.begin());
215+
}
216+
187217
amrex::Gpu::exclusive_scan(m_box_counts_d.begin(), m_box_counts_d.end(),
188218
m_box_offsets.begin());
189219

Src/Particle/AMReX_ParticleContainerBase.H

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ public:
231231

232232
void SetVerbose (int verbose) { m_verbose = verbose; }
233233

234+
[[nodiscard]] int stableRedistribute () const {return m_stable_redistribute; }
235+
236+
void setStableRedistribute (int stable) { m_stable_redistribute = stable; }
237+
234238
const ParticleBufferMap& BufferMap () const {return m_buffer_map;}
235239

236240
Vector<int> NeighborProcs(int ngrow) const
@@ -260,6 +264,7 @@ protected:
260264
void defineBufferMap () const;
261265

262266
int m_verbose{0};
267+
int m_stable_redistribute = 0;
263268
std::unique_ptr<ParGDB> m_gdb_object = std::make_unique<ParGDB>();
264269
ParGDBBase* m_gdb{nullptr};
265270
Vector<std::unique_ptr<MultiFab> > m_dummy_mf;

Tests/Particles/Redistribute/main.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ struct TestParams
328328
int do_regrid;
329329
int sort;
330330
int test_level_lost = 0;
331+
int stable_redistribute = 0;
331332
};
332333

333334
void testRedistribute();
@@ -358,6 +359,7 @@ void get_test_params(TestParams& params, const std::string& prefix)
358359
pp.query("num_runtime_real", num_runtime_real);
359360
pp.query("num_runtime_int", num_runtime_int);
360361
pp.query("remove_negative", remove_negative);
362+
pp.query("stable_redistribute", params.stable_redistribute);
361363

362364
params.sort = 0;
363365
pp.query("sort", params.sort);
@@ -410,6 +412,7 @@ void testRedistribute ()
410412
}
411413

412414
TestParticleContainer pc(geom, dm, ba, rr);
415+
pc.setStableRedistribute(params.stable_redistribute);
413416

414417
IntVect nppc(params.num_ppc);
415418

0 commit comments

Comments
 (0)