Skip to content

Commit 28b2820

Browse files
committed
[ELF] Do not use parallel scan to compute section offsets
A function passed to tbb::parallel_scan must be associative, but I don't think our function satisfied that constraint. So I rewrote code without tbb::parallel_scan.
1 parent 021463c commit 28b2820

File tree

1 file changed

+47
-27
lines changed

1 file changed

+47
-27
lines changed

elf/passes.cc

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <optional>
66
#include <regex>
77
#include <tbb/parallel_for_each.h>
8-
#include <tbb/parallel_scan.h>
98
#include <tbb/partitioner.h>
109
#include <unordered_set>
1110

@@ -469,36 +468,57 @@ template <typename E>
469468
void compute_section_sizes(Context<E> &ctx) {
470469
Timer t(ctx, "compute_section_sizes");
471470

472-
struct T {
473-
i64 offset;
474-
i64 align;
471+
struct Group {
472+
i64 size = 0;
473+
i64 alignment = 1;
474+
i64 offset = 0;
475+
std::span<InputSection<E> *> members;
475476
};
476477

477478
tbb::parallel_for_each(ctx.output_sections,
478479
[&](std::unique_ptr<OutputSection<E>> &osec) {
479-
T sum = tbb::parallel_scan(
480-
tbb::blocked_range<i64>(0, osec->members.size(), 10000),
481-
T{0, 1},
482-
[&](const tbb::blocked_range<i64> &r, T sum, bool is_final) {
483-
for (i64 i = r.begin(); i < r.end(); i++) {
484-
InputSection<E> &isec = *osec->members[i];
485-
sum.offset = align_to(sum.offset, isec.shdr.sh_addralign);
486-
if (is_final)
487-
isec.offset = sum.offset;
488-
sum.offset += isec.shdr.sh_size;
489-
sum.align = std::max<i64>(sum.align, isec.shdr.sh_addralign);
490-
}
491-
return sum;
492-
},
493-
[](T lhs, T rhs) {
494-
i64 offset = align_to(lhs.offset, rhs.align) + rhs.offset;
495-
i64 align = std::max(lhs.align, rhs.align);
496-
return T{offset, align};
497-
},
498-
tbb::simple_partitioner());
499-
500-
osec->shdr.sh_size = sum.offset;
501-
osec->shdr.sh_addralign = sum.align;
480+
if (osec->members.empty())
481+
return;
482+
483+
// Since one output section may contain millions of input sections,
484+
// we first split input sections into groups and assign offsets to
485+
// groups.
486+
std::vector<Group> groups;
487+
constexpr i64 group_size = 10000;
488+
489+
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
490+
groups.push_back(Group{.members = span});
491+
492+
tbb::parallel_for_each(groups, [](Group &group) {
493+
for (InputSection<E> *isec : group.members) {
494+
group.size = align_to(group.size, isec->shdr.sh_addralign) +
495+
isec->shdr.sh_size;
496+
group.alignment = std::max<i64>(group.alignment, isec->shdr.sh_addralign);
497+
}
498+
});
499+
500+
i64 offset = 0;
501+
i64 align = 1;
502+
503+
for (i64 i = 0; i < groups.size(); i++) {
504+
offset = align_to(offset, groups[i].alignment);
505+
groups[i].offset = offset;
506+
offset += groups[i].size;
507+
align = std::max(align, groups[i].alignment);
508+
}
509+
510+
osec->shdr.sh_size = offset;
511+
osec->shdr.sh_addralign = align;
512+
513+
// Assign offsets to input sections.
514+
tbb::parallel_for_each(groups, [](Group &group) {
515+
i64 offset = group.offset;
516+
for (InputSection<E> *isec : group.members) {
517+
offset = align_to(offset, isec->shdr.sh_addralign);
518+
isec->offset = offset;
519+
offset += isec->shdr.sh_size;
520+
}
521+
});
502522
});
503523
}
504524

0 commit comments

Comments
 (0)