|
5 | 5 | #include <optional>
|
6 | 6 | #include <regex>
|
7 | 7 | #include <tbb/parallel_for_each.h>
|
8 |
| -#include <tbb/parallel_scan.h> |
9 | 8 | #include <tbb/partitioner.h>
|
10 | 9 | #include <unordered_set>
|
11 | 10 |
|
@@ -203,7 +202,6 @@ void compute_merged_section_sizes(Context<E> &ctx) {
|
203 | 202 |
|
204 | 203 | template <typename T>
|
205 | 204 | static std::vector<std::span<T>> split(std::vector<T> &input, i64 unit) {
|
206 |
| - assert(input.size() > 0); |
207 | 205 | std::span<T> span(input);
|
208 | 206 | std::vector<std::span<T>> vec;
|
209 | 207 |
|
@@ -469,36 +467,54 @@ template <typename E>
|
469 | 467 | void compute_section_sizes(Context<E> &ctx) {
|
470 | 468 | Timer t(ctx, "compute_section_sizes");
|
471 | 469 |
|
472 |
| - struct T { |
473 |
| - i64 offset; |
474 |
| - i64 align; |
| 470 | + struct Group { |
| 471 | + i64 size = 0; |
| 472 | + i64 alignment = 1; |
| 473 | + i64 offset = 0; |
| 474 | + std::span<InputSection<E> *> members; |
475 | 475 | };
|
476 | 476 |
|
477 | 477 | tbb::parallel_for_each(ctx.output_sections,
|
478 | 478 | [&](std::unique_ptr<OutputSection<E>> &osec) {
|
479 |
| - T sum = tbb::parallel_scan( |
480 |
| - tbb::blocked_range<i64>(0, osec->members.size(), 10000), |
481 |
| - T{0, 1}, |
482 |
| - [&](const tbb::blocked_range<i64> &r, T sum, bool is_final) { |
483 |
| - for (i64 i = r.begin(); i < r.end(); i++) { |
484 |
| - InputSection<E> &isec = *osec->members[i]; |
485 |
| - sum.offset = align_to(sum.offset, isec.shdr.sh_addralign); |
486 |
| - if (is_final) |
487 |
| - isec.offset = sum.offset; |
488 |
| - sum.offset += isec.shdr.sh_size; |
489 |
| - sum.align = std::max<i64>(sum.align, isec.shdr.sh_addralign); |
490 |
| - } |
491 |
| - return sum; |
492 |
| - }, |
493 |
| - [](T lhs, T rhs) { |
494 |
| - i64 offset = align_to(lhs.offset, rhs.align) + rhs.offset; |
495 |
| - i64 align = std::max(lhs.align, rhs.align); |
496 |
| - return T{offset, align}; |
497 |
| - }, |
498 |
| - tbb::simple_partitioner()); |
499 |
| - |
500 |
| - osec->shdr.sh_size = sum.offset; |
501 |
| - osec->shdr.sh_addralign = sum.align; |
| 479 | + // Since one output section may contain millions of input sections, |
| 480 | + // we first split input sections into groups and assign offsets to |
| 481 | + // groups. |
| 482 | + std::vector<Group> groups; |
| 483 | + constexpr i64 group_size = 10000; |
| 484 | + |
| 485 | + for (std::span<InputSection<E> *> span : split(osec->members, group_size)) |
| 486 | + groups.push_back(Group{.members = span}); |
| 487 | + |
| 488 | + tbb::parallel_for_each(groups, [](Group &group) { |
| 489 | + for (InputSection<E> *isec : group.members) { |
| 490 | + group.size = align_to(group.size, isec->shdr.sh_addralign) + |
| 491 | + isec->shdr.sh_size; |
| 492 | + group.alignment = std::max<i64>(group.alignment, isec->shdr.sh_addralign); |
| 493 | + } |
| 494 | + }); |
| 495 | + |
| 496 | + i64 offset = 0; |
| 497 | + i64 align = 1; |
| 498 | + |
| 499 | + for (i64 i = 0; i < groups.size(); i++) { |
| 500 | + offset = align_to(offset, groups[i].alignment); |
| 501 | + groups[i].offset = offset; |
| 502 | + offset += groups[i].size; |
| 503 | + align = std::max(align, groups[i].alignment); |
| 504 | + } |
| 505 | + |
| 506 | + osec->shdr.sh_size = offset; |
| 507 | + osec->shdr.sh_addralign = align; |
| 508 | + |
| 509 | + // Assign offsets to input sections. |
| 510 | + tbb::parallel_for_each(groups, [](Group &group) { |
| 511 | + i64 offset = group.offset; |
| 512 | + for (InputSection<E> *isec : group.members) { |
| 513 | + offset = align_to(offset, isec->shdr.sh_addralign); |
| 514 | + isec->offset = offset; |
| 515 | + offset += isec->shdr.sh_size; |
| 516 | + } |
| 517 | + }); |
502 | 518 | });
|
503 | 519 | }
|
504 | 520 |
|
|
0 commit comments