Skip to content

Commit 7a8c14a

Browse files
committedJan 16, 2022
[ELF] Do not use parallel scan to compute section offsets
A function passed to tbb::parallel_scan must be associative, but I don't think our function satisfied that constraint. So I rewrote code without tbb::parallel_scan.
1 parent 021463c commit 7a8c14a

File tree

1 file changed

+44
-28
lines changed

1 file changed

+44
-28
lines changed
 

‎elf/passes.cc

+44-28
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <optional>
66
#include <regex>
77
#include <tbb/parallel_for_each.h>
8-
#include <tbb/parallel_scan.h>
98
#include <tbb/partitioner.h>
109
#include <unordered_set>
1110

@@ -203,7 +202,6 @@ void compute_merged_section_sizes(Context<E> &ctx) {
203202

204203
template <typename T>
205204
static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
206-
assert(input.size() > 0);
207205
std::span<T> span(input);
208206
std::vector<std::span<T>> vec;
209207

@@ -469,36 +467,54 @@ template <typename E>
469467
void compute_section_sizes(Context<E> &ctx) {
470468
Timer t(ctx, "compute_section_sizes");
471469

472-
struct T {
473-
i64 offset;
474-
i64 align;
470+
struct Group {
471+
i64 size = 0;
472+
i64 alignment = 1;
473+
i64 offset = 0;
474+
std::span<InputSection<E> *> members;
475475
};
476476

477477
tbb::parallel_for_each(ctx.output_sections,
478478
[&](std::unique_ptr<OutputSection<E>> &osec) {
479-
T sum = tbb::parallel_scan(
480-
tbb::blocked_range<i64>(0, osec->members.size(), 10000),
481-
T{0, 1},
482-
[&](const tbb::blocked_range<i64> &r, T sum, bool is_final) {
483-
for (i64 i = r.begin(); i < r.end(); i++) {
484-
InputSection<E> &isec = *osec->members[i];
485-
sum.offset = align_to(sum.offset, isec.shdr.sh_addralign);
486-
if (is_final)
487-
isec.offset = sum.offset;
488-
sum.offset += isec.shdr.sh_size;
489-
sum.align = std::max<i64>(sum.align, isec.shdr.sh_addralign);
490-
}
491-
return sum;
492-
},
493-
[](T lhs, T rhs) {
494-
i64 offset = align_to(lhs.offset, rhs.align) + rhs.offset;
495-
i64 align = std::max(lhs.align, rhs.align);
496-
return T{offset, align};
497-
},
498-
tbb::simple_partitioner());
499-
500-
osec->shdr.sh_size = sum.offset;
501-
osec->shdr.sh_addralign = sum.align;
479+
// Since one output section may contain millions of input sections,
480+
// we first split input sections into groups and assign offsets to
481+
// groups.
482+
std::vector<Group> groups;
483+
constexpr i64 group_size = 10000;
484+
485+
for (std::span<InputSection<E> *> span : split(osec->members, group_size))
486+
groups.push_back(Group{.members = span});
487+
488+
tbb::parallel_for_each(groups, [](Group &group) {
489+
for (InputSection<E> *isec : group.members) {
490+
group.size = align_to(group.size, isec->shdr.sh_addralign) +
491+
isec->shdr.sh_size;
492+
group.alignment = std::max<i64>(group.alignment, isec->shdr.sh_addralign);
493+
}
494+
});
495+
496+
i64 offset = 0;
497+
i64 align = 1;
498+
499+
for (i64 i = 0; i < groups.size(); i++) {
500+
offset = align_to(offset, groups[i].alignment);
501+
groups[i].offset = offset;
502+
offset += groups[i].size;
503+
align = std::max(align, groups[i].alignment);
504+
}
505+
506+
osec->shdr.sh_size = offset;
507+
osec->shdr.sh_addralign = align;
508+
509+
// Assign offsets to input sections.
510+
tbb::parallel_for_each(groups, [](Group &group) {
511+
i64 offset = group.offset;
512+
for (InputSection<E> *isec : group.members) {
513+
offset = align_to(offset, isec->shdr.sh_addralign);
514+
isec->offset = offset;
515+
offset += isec->shdr.sh_size;
516+
}
517+
});
502518
});
503519
}
504520

0 commit comments

Comments
 (0)
Please sign in to comment.