Skip to content

Commit 800b776

Browse files
committed
Flipped the nesting of the loops to improve memory locality.
This ensures that the tight inner loop just writes to a buffer for a single set. Otherwise, we need to jump around to write to buffers for different sets.
1 parent c4d2a96 commit 800b776

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

include/scran_aggregate/aggregate_across_genes.hpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,11 @@ void compute_aggregate_by_row(
211211

212212
for (size_t sub = 0; sub < nsubs; ++sub) {
213213
auto range = ext->fetch(vbuffer.data(), ibuffer.data());
214-
const auto& sets = remapping[sub];
215-
216-
for (Index_ c = 0; c < range.number; ++c) {
217-
auto cell = range.index[c];
218-
auto val = range.value[c];
219-
for (const auto& sw : sets) {
220-
buffers.sum[sw.first][cell] += val * sw.second;
214+
for (const auto& sw : remapping[sub]) {
215+
auto outptr = buffers.sum[sw.first];
216+
auto wt = sw.second;
217+
for (Index_ c = 0; c < range.number; ++c) {
218+
outptr[range.index[c]] += range.value[c] * wt;
221219
}
222220
}
223221
}
@@ -230,13 +228,11 @@ void compute_aggregate_by_row(
230228

231229
for (size_t sub = 0; sub < nsubs; ++sub) {
232230
auto ptr = ext->fetch(vbuffer.data());
233-
const auto& sets = remapping[sub];
234-
235-
for (Index_ cell = 0; cell < length; ++cell) {
236-
auto val = ptr[cell];
237-
size_t pos = cell + start;
238-
for (const auto& sw : sets) {
239-
buffers.sum[sw.first][pos] += val * sw.second;
231+
for (const auto& sw : remapping[sub]) {
232+
auto outptr = buffers.sum[sw.first];
233+
auto wt = sw.second;
234+
for (Index_ cell = 0; cell < length; ++cell) {
235+
outptr[cell + start] += ptr[cell] * wt;
240236
}
241237
}
242238
}

0 commit comments

Comments
 (0)