Skip to content

Commit d4ca5c2

Browse files
committed
Ignore time dependencies from different schedules
Signed-off-by: Bartłomiej Chmiel <[email protected]>
1 parent 815044c commit d4ca5c2

File tree

2 files changed

+63
-15
lines changed

2 files changed

+63
-15
lines changed

src/V3ExecGraph.cpp

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class ThreadSchedule final {
6666
uint32_t m_id; // Unique ID of a schedule
6767
static uint32_t s_nextId; // Next ID number to use
6868
std::unordered_set<const ExecMTask*> mtasks; // Mtasks in this schedule
69+
uint32_t m_endTime = 0; // Latest task end time in this schedule
6970

7071
public:
7172
// CONSTANTS
@@ -196,6 +197,7 @@ class ThreadSchedule final {
196197
uint32_t scheduleOn(const ExecMTask* mtaskp, uint32_t bestThreadId) {
197198
mtasks.emplace(mtaskp);
198199
const uint32_t bestEndTime = mtaskp->predictStart() + mtaskp->cost();
200+
m_endTime = std::max(m_endTime, bestEndTime);
199201
mtaskState[mtaskp].completionTime = bestEndTime;
200202
mtaskState[mtaskp].threadId = bestThreadId;
201203

@@ -208,6 +210,7 @@ class ThreadSchedule final {
208210
return bestEndTime;
209211
}
210212
bool contains(const ExecMTask* mtaskp) const { return mtasks.count(mtaskp); }
213+
uint32_t endTime() const { return m_endTime; }
211214
};
212215

213216
uint32_t ThreadSchedule::s_nextId = 0;
@@ -256,6 +259,8 @@ class PackThreads final {
256259
// METHODS
257260
uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp,
258261
uint32_t threadId) {
262+
// Ignore tasks that were scheduled on a different schedule
263+
if (!schedule.contains(mtaskp)) return 0;
259264
const ThreadSchedule::MTaskState& state = schedule.mtaskState.at(mtaskp);
260265
UASSERT(state.threadId != ThreadSchedule::UNASSIGNED, "Mtask should have assigned thread");
261266
if (threadId == state.threadId) {
@@ -380,12 +385,14 @@ class PackThreads final {
380385
// If no tasks were added to the normal thread schedule, remove it.
381386
if (schedule.mtaskState.empty()) result.erase(result.begin());
382387
result.emplace_back(ThreadSchedule{size});
388+
std::fill(busyUntil.begin(), busyUntil.end(), schedule.endTime());
383389
continue;
384390
}
385391

386392
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_SCHEDULING) {
387393
mode = SchedulingMode::SCHEDULING;
388394
if (!schedule.mtaskState.empty()) result.emplace_back(ThreadSchedule{m_nThreads});
395+
std::fill(busyUntil.begin(), busyUntil.end(), schedule.endTime());
389396
continue;
390397
}
391398

@@ -466,25 +473,36 @@ class PackThreads final {
466473
t4->cost(100);
467474
t4->priority(100);
468475
t4->threads(3);
476+
ExecMTask* const t5 = new ExecMTask{&graph, makeBody()};
477+
t5->cost(100);
478+
t5->priority(100);
479+
ExecMTask* const t6 = new ExecMTask{&graph, makeBody()};
480+
t6->cost(100);
481+
t6->priority(100);
469482

470483
/*
471484
0
472485
/ \
473486
1 2
474487
/ \
475-
3 4
488+
3 4
489+
/ \
490+
5 6
476491
*/
477492
new V3GraphEdge{&graph, t0, t1, 1};
478493
new V3GraphEdge{&graph, t0, t2, 1};
479494
new V3GraphEdge{&graph, t2, t3, 1};
480495
new V3GraphEdge{&graph, t2, t4, 1};
496+
new V3GraphEdge{&graph, t3, t5, 1};
497+
new V3GraphEdge{&graph, t4, t6, 1};
481498

482499
constexpr uint32_t threads = 6;
483500
PackThreads packer{threads,
484501
3, // Sandbag numerator
485502
10}; // Sandbag denom
486503

487504
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
505+
UASSERT_SELFTEST(size_t, scheduled.size(), 3);
488506
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads);
489507
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 2);
490508
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
@@ -494,17 +512,23 @@ class PackThreads final {
494512
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1);
495513

496514
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads / 3);
497-
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t2);
498-
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][1], t3);
499-
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t4);
515+
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2);
516+
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3);
517+
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4);
500518

501-
UASSERT_SELFTEST(size_t, ThreadSchedule::mtaskState.size(), 5);
519+
UASSERT_SELFTEST(size_t, scheduled[2].threads.size(), threads);
520+
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].threads[0][0], t5);
521+
UASSERT_SELFTEST(const ExecMTask*, scheduled[2].threads[1][0], t6);
522+
523+
UASSERT_SELFTEST(size_t, ThreadSchedule::mtaskState.size(), 7);
502524

503525
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t0), 0);
504526
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t1), 0);
505-
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t2), 1);
506-
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t3), 1);
507-
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t4), 0);
527+
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t2), 0);
528+
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t3), 0);
529+
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t4), 1);
530+
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t5), 0);
531+
UASSERT_SELFTEST(uint32_t, ThreadSchedule::threadId(t6), 1);
508532

509533
// On its native thread, we see the actual end time for t0:
510534
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t0, 0), 1000);
@@ -518,12 +542,36 @@ class PackThreads final {
518542
// with t0's sandbagged time; compounding caused trouble in
519543
// practice.
520544
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t1, 1), 1130);
521-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 0), 1229);
522-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 1), 1199);
523-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 0), 1329);
524-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 1), 1299);
525-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 0), 1329);
526-
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 1), 1359);
545+
546+
// Wide task scheduling
547+
548+
// Task does not depend on previous or future schedules
549+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[0], t2, 0), 0);
550+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[2], t2, 0), 0);
551+
552+
// We allow sandbagging for hierarchical children tasks, this does not affect
553+
// wide task scheduling. When the next schedule is created it doesn't matter
554+
// anyway.
555+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 0), 1200);
556+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 1), 1230);
557+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 2), 1230);
558+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 3), 1230);
559+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 4), 1230);
560+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t2, 5), 1230);
561+
562+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 0), 1300);
563+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 1), 1330);
564+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 2), 1330);
565+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 3), 1330);
566+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 4), 1330);
567+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t3, 5), 1330);
568+
569+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 0), 1360);
570+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 1), 1330);
571+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 2), 1360);
572+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 3), 1360);
573+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 4), 1360);
574+
UASSERT_SELFTEST(uint32_t, packer.completionTime(scheduled[1], t4, 5), 1360);
527575

528576
for (AstNode* const nodep : mTaskBodyps) nodep->deleteTree();
529577
}

test_regress/t/t_hier_block_perf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
3737
r'Optimizations, Thread schedule count\s+(\d+)', 4)
3838
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
39-
r'Optimizations, Thread schedule total tasks\s+(\d+)', 10)
39+
r'Optimizations, Thread schedule total tasks\s+(\d+)', 12)
4040

4141
test.execute(all_run_flags=[
4242
"+verilator+prof+exec+start+2",

0 commit comments

Comments
 (0)