Skip to content

Commit 1ab1a6e

Browse files
committed
Profile nested mtasks
Signed-off-by: Bartłomiej Chmiel <[email protected]>
1 parent cbabd8a commit 1ab1a6e

File tree

9 files changed

+181
-148
lines changed

9 files changed

+181
-148
lines changed

bin/verilator_gantt

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ def read_data(filename):
3737
with open(filename, "r", encoding="utf8") as fh:
3838
re_thread = re.compile(r'^VLPROFTHREAD (\d+)$')
3939
re_record = re.compile(r'^VLPROFEXEC (\S+) (\d+)(.*)$')
40-
re_payload_mtaskBegin = re.compile(r'id (\d+) predictStart (\d+) cpu (\d+)')
41-
re_payload_mtaskEnd = re.compile(r'id (\d+) predictCost (\d+)')
40+
re_payload_mtaskBegin = re.compile(
41+
r'id (\d+) predictStart (\d+) cpu (\d+) module\s*(\w+)?')
42+
re_payload_mtaskEnd = re.compile(r'predictCost (\d+)')
4243
re_payload_wait = re.compile(r'cpu (\d+)')
4344

4445
re_arg1 = re.compile(r'VLPROF arg\s+(\S+)\+([0-9.]*)\s*')
@@ -56,8 +57,8 @@ def read_data(filename):
5657

5758
ExecGraphStack = []
5859
SectionStack = []
60+
MtasksStack = []
5961
ThreadScheduleWait = collections.defaultdict(list)
60-
mTaskThread = {}
6162

6263
for line in fh:
6364
recordMatch = re_record.match(line)
@@ -74,33 +75,35 @@ def read_data(filename):
7475
SectionStack.pop()
7576
Sections[thread].append((tick, tuple(SectionStack)))
7677
elif kind == "MTASK_BEGIN":
77-
mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups()
78+
mtask, predict_start, ecpu, module = re_payload_mtaskBegin.match(
79+
payload).groups()
7880
mtask = int(mtask)
7981
predict_start = int(predict_start)
8082
ecpu = int(ecpu)
81-
mTaskThread[mtask] = thread
8283
records = Threads[thread]
83-
assert not records or records[-1]['start'] <= records[-1]['end'] <= tick
8484
records.append({
8585
'start': tick,
8686
'mtask': mtask,
8787
'predict_start': predict_start,
88+
'module': module,
8889
'cpu': ecpu
8990
})
90-
Mtasks[mtask]['begin'] = tick
91-
Mtasks[mtask]['thread'] = thread
92-
Mtasks[mtask]['predict_start'] = predict_start
91+
Mtasks[(module, mtask)]['begin'] = tick
92+
Mtasks[(module, mtask)]['predict_start'] = predict_start
93+
Mtasks[(module, mtask)]['thread'] = thread
94+
MtasksStack.append((module, mtask, records[-1]))
9395
elif kind == "MTASK_END":
94-
mtask, predict_cost = re_payload_mtaskEnd.match(payload).groups()
96+
predict_cost, = re_payload_mtaskEnd.match(payload).groups()
9597
mtask = int(mtask)
98+
module, mtask, record = MtasksStack.pop()
9699
predict_cost = int(predict_cost)
97-
begin = Mtasks[mtask]['begin']
98-
record = Threads[mTaskThread[mtask]][-1]
100+
begin = Mtasks[(module, mtask)]['begin']
99101
record['end'] = tick
102+
assert record and records[-1]['start'] <= records[-1]['end'] <= tick
100103
record['predict_cost'] = predict_cost
101-
Mtasks[mtask]['elapsed'] += tick - begin
102-
Mtasks[mtask]['predict_cost'] = predict_cost
103-
Mtasks[mtask]['end'] = max(Mtasks[mtask]['end'], tick)
104+
Mtasks[(module, mtask)]['elapsed'] += tick - begin
105+
Mtasks[(module, mtask)]['predict_cost'] = predict_cost
106+
Mtasks[(module, mtask)]['end'] = max(Mtasks[(module, mtask)]['end'], tick)
104107
elif kind == "THREAD_SCHEDULE_WAIT_BEGIN":
105108
ecpu = int(re_payload_wait.match(payload).groups()[0])
106109
ThreadScheduleWait[ecpu].append(tick)
@@ -171,6 +174,7 @@ def report():
171174
for records in Threads.values():
172175
for record in records:
173176
cpu = record['cpu']
177+
#raise Exception(record)
174178
elapsed = record['end'] - record['start']
175179
Cpus[cpu]['mtask_time'] += elapsed
176180

@@ -234,8 +238,8 @@ def report_mtasks():
234238
long_mtask = None
235239
predict_mtask_time = 0
236240
predict_elapsed = 0
237-
for mtaskId in Mtasks:
238-
record = Mtasks[mtaskId]
241+
for (module, mtaskId) in Mtasks:
242+
record = Mtasks[(module, mtaskId)]
239243
predict_mtask_time += record['predict_cost']
240244
total_mtask_time += record['elapsed']
241245
thread_mtask_time[record['thread']] += record['elapsed']
@@ -244,6 +248,7 @@ def report_mtasks():
244248
if record['elapsed'] > long_mtask_time:
245249
long_mtask_time = record['elapsed']
246250
long_mtask = mtaskId
251+
long_mtask_module = module
247252
Global['predict_last_end'] = predict_elapsed
248253

249254
serialTime = ElapsedTime - ExecGraphTime
@@ -272,31 +277,51 @@ def report_mtasks():
272277
max_p2e = -1000000
273278
max_mtask = None
274279

275-
for mtask in sorted(Mtasks.keys()):
276-
if Mtasks[mtask]['elapsed'] > 0:
277-
if Mtasks[mtask]['predict_cost'] == 0:
278-
Mtasks[mtask]['predict_cost'] = 1 # don't log(0) below
279-
p2e_ratio = math.log(Mtasks[mtask]['predict_cost'] / Mtasks[mtask]['elapsed'])
280+
for (module, mtaskId) in sorted(Mtasks.keys()):
281+
mtask = Mtasks[(module, mtaskId)]
282+
if mtask['elapsed'] > 0:
283+
if mtask['predict_cost'] == 0:
284+
mtask['predict_cost'] = 1 # don't log(0) below
285+
p2e_ratio = math.log(mtask['predict_cost'] / mtask['elapsed'])
280286
p2e_ratios.append(p2e_ratio)
281287

282288
if p2e_ratio > max_p2e:
283289
max_p2e = p2e_ratio
284-
max_mtask = mtask
290+
max_mtask = mtaskId
291+
max_module = module
285292
if p2e_ratio < min_p2e:
286293
min_p2e = p2e_ratio
287-
min_mtask = mtask
294+
min_mtask = mtaskId
295+
min_module = module
288296

289297
print("\nMTask statistics:")
290-
print(" Longest mtask id = {}".format(long_mtask))
298+
if long_mtask_module:
299+
print(" Longest mtask id = {} from module '{}'".format(long_mtask, long_mtask_module))
300+
else:
301+
print(" Longest mtask id = {}".format(long_mtask))
291302
print(" Longest mtask time = {:.2%} of time elapsed in parallelized code".format(
292303
long_mtask_time / ExecGraphTime))
293304
print(" min log(p2e) = %0.3f" % min_p2e, end="")
294305

295-
print(" from mtask %d (predict %d," % (min_mtask, Mtasks[min_mtask]['predict_cost']), end="")
296-
print(" elapsed %d)" % Mtasks[min_mtask]['elapsed'])
306+
if min_module:
307+
print(" from module '%s' mtask %d (predict %d," %
308+
(min_module, min_mtask, Mtasks[(min_module, min_mtask)]['predict_cost']),
309+
end="")
310+
else:
311+
print(" from mtask %d (predict %d," %
312+
(min_mtask, Mtasks[(min_module, min_mtask)]['predict_cost']),
313+
end="")
314+
print(" elapsed %d)" % Mtasks[(min_module, min_mtask)]['elapsed'])
297315
print(" max log(p2e) = %0.3f" % max_p2e, end="")
298-
print(" from mtask %d (predict %d," % (max_mtask, Mtasks[max_mtask]['predict_cost']), end="")
299-
print(" elapsed %d)" % Mtasks[max_mtask]['elapsed'])
316+
if max_module:
317+
print(" from module '%s' mtask %d (predict %d," %
318+
(max_module, max_mtask, Mtasks[(max_module, max_mtask)]['predict_cost']),
319+
end="")
320+
else:
321+
print(" from mtask %d (predict %d," %
322+
(max_mtask, Mtasks[(max_module, max_mtask)]['predict_cost']),
323+
end="")
324+
print(" elapsed %d)" % Mtasks[(max_module, max_mtask)]['elapsed'])
300325

301326
stddev = statistics.pstdev(p2e_ratios)
302327
mean = statistics.mean(p2e_ratios)
@@ -482,17 +507,17 @@ def write_vcd(filename):
482507
# Compute scale so predicted graph is of same width as interval
483508
measured_scaling = (end - start) / Global['predict_last_end']
484509
# Predict mtasks that fill the time the execution occupied
485-
for mtask in Mtasks:
486-
thread = Mtasks[mtask]['thread']
487-
pred_scaled_start = start + int(Mtasks[mtask]['predict_start'] * measured_scaling)
510+
for (module, mtaskId) in Mtasks:
511+
mtask = Mtasks[(module, mtaskId)]
512+
thread = mtask['thread']
513+
pred_scaled_start = start + int(mtask['predict_start'] * measured_scaling)
488514
pred_scaled_end = start + int(
489-
(Mtasks[mtask]['predict_start'] + Mtasks[mtask]['predict_cost']) *
490-
measured_scaling)
515+
(mtask['predict_start'] + mtask['predict_cost']) * measured_scaling)
491516
if pred_scaled_start == pred_scaled_end:
492517
continue
493518

494519
mcode = getCode(32, 'predicted', 't%d_mtask' % thread)
495-
addValue(mcode, pred_scaled_start, mtask)
520+
addValue(mcode, pred_scaled_start, mtaskId)
496521
addValue(mcode, pred_scaled_end, None)
497522

498523
parallelism['predicted'][pred_scaled_start] += 1

include/verilated_profiler.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@ void VlExecutionProfiler::dump(const char* filenamep, uint64_t tickEnd)
190190
break;
191191
case VlExecutionRecord::Type::MTASK_BEGIN: {
192192
const auto& payload = er.m_payload.mtaskBegin;
193-
fprintf(fp, " id %u predictStart %u cpu %u\n", payload.m_id,
194-
payload.m_predictStart, payload.m_cpu);
193+
fprintf(fp, " id %u predictStart %u cpu %u module %s\n", payload.m_id,
194+
payload.m_predictStart, payload.m_cpu, payload.m_module);
195195
break;
196196
}
197197
case VlExecutionRecord::Type::MTASK_END: {
198198
const auto& payload = er.m_payload.mtaskEnd;
199-
fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost);
199+
fprintf(fp, " predictCost %u\n", payload.m_predictCost);
200200
break;
201201
}
202202
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_BEGIN:

include/verilated_profiler.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ class VlExecutionRecord final {
9292
uint32_t m_id; // MTask id
9393
uint32_t m_predictStart; // Time scheduler predicted would start
9494
uint32_t m_cpu; // Executing CPU id
95+
const char* m_module; // Name of module with this mtask
9596
} mtaskBegin;
9697
struct {
97-
uint32_t m_id; // MTask id
9898
uint32_t m_predictCost; // How long scheduler predicted would take
9999
} mtaskEnd;
100100
struct {
@@ -120,14 +120,14 @@ class VlExecutionRecord final {
120120
m_type = Type::SECTION_PUSH;
121121
}
122122
void sectionPop() { m_type = Type::SECTION_POP; }
123-
void mtaskBegin(uint32_t id, uint32_t predictStart) {
123+
void mtaskBegin(uint32_t id, uint32_t predictStart, const char* moduleName) {
124124
m_payload.mtaskBegin.m_id = id;
125125
m_payload.mtaskBegin.m_predictStart = predictStart;
126126
m_payload.mtaskBegin.m_cpu = VlOs::getcpu();
127+
m_payload.mtaskBegin.m_module = moduleName;
127128
m_type = Type::MTASK_BEGIN;
128129
}
129-
void mtaskEnd(uint32_t id, uint32_t predictCost) {
130-
m_payload.mtaskEnd.m_id = id;
130+
void mtaskEnd(uint32_t predictCost) {
131131
m_payload.mtaskEnd.m_predictCost = predictCost;
132132
m_type = Type::MTASK_END;
133133
}

src/V3ExecGraph.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "V3Config.h"
2222
#include "V3EmitCBase.h"
2323
#include "V3File.h"
24+
#include "V3Global.h"
2425
#include "V3GraphStream.h"
2526
#include "V3Hasher.h"
2627
#include "V3InstrCount.h"
@@ -916,18 +917,14 @@ void wrapMTaskBodies(AstExecGraph* const execGraphp) {
916917
funcp->addStmtsp(new AstCStmt{flp, stmt});
917918
};
918919

919-
if (v3Global.opt.hierChild() || !v3Global.opt.hierBlocks().empty()) {
920-
addStrStmt(
921-
"static const unsigned taskId = vlSymsp->__Vm_threadPoolp->assignTaskIndex();\n");
922-
} else {
923-
const string& id = std::to_string(mtaskp->id());
924-
addStrStmt("static constexpr unsigned taskId = " + id + ";\n");
925-
}
920+
addStrStmt("static constexpr unsigned taskId = " + cvtToStr(mtaskp->id()) + ";\n");
926921

927-
if (v3Global.opt.profExec() && mtaskp->threads() <= 1) {
922+
if (v3Global.opt.profExec()) {
928923
const string& predictStart = std::to_string(mtaskp->predictStart());
924+
// We use top module flag as it is always set for all hierarchical phases, when it is
925+
// not set we assume that it is a non-hierarchical run.
929926
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskBegin(taskId, " + predictStart
930-
+ ");\n");
927+
+ ", \"" + v3Global.opt.topModule() + "\");\n");
931928
}
932929

933930
// Set mtask ID in the run-time system
@@ -939,10 +936,9 @@ void wrapMTaskBodies(AstExecGraph* const execGraphp) {
939936
// Flush message queue
940937
addStrStmt("Verilated::endOfThreadMTask(vlSymsp->__Vm_evalMsgQp);\n");
941938

942-
if (v3Global.opt.profExec() && mtaskp->threads() <= 1) {
943-
const string& predictConst = std::to_string(mtaskp->cost());
944-
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(taskId, " + predictConst
945-
+ ");\n");
939+
if (v3Global.opt.profExec()) {
940+
const string& predictCost = std::to_string(mtaskp->cost());
941+
addStrStmt("VL_EXEC_TRACE_ADD_RECORD(vlSymsp).mtaskEnd(" + predictCost + ");\n");
946942
}
947943

948944
// AstMTask will simply contain a call

test_regress/t/t_gantt_io.dat

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -902,40 +902,42 @@ VLPROFPROC power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
902902
VLPROFPROC
903903
VLPROFTHREAD 0
904904
VLPROFEXEC EXEC_GRAPH_BEGIN 945
905-
VLPROFEXEC MTASK_BEGIN 2695 id 6 predictStart 0 cpu 19
906-
VLPROFEXEC MTASK_END 2905 id 6 predictCost 30
907-
VLPROFEXEC MTASK_BEGIN 9695 id 10 predictStart 196 cpu 19
908-
VLPROFEXEC MTASK_END 9870 id 10 predictCost 30
905+
VLPROFEXEC MTASK_BEGIN 2695 id 6 predictStart 0 cpu 19 module top
906+
VLPROFEXEC MTASK_BEGIN 3795 id 10 predictStart 196 cpu 19 module sub
907+
VLPROFEXEC MTASK_END 4850 predictCost 30
908+
VLPROFEXEC MTASK_END 5905 predictCost 30
909+
VLPROFEXEC MTASK_BEGIN 9695 id 10 predictStart 196 cpu 19 module top
910+
VLPROFEXEC MTASK_END 9870 predictCost 30
909911
VLPROFEXEC EXEC_GRAPH_END 12180
910912
VLPROFEXEC EXEC_GRAPH_BEGIN 14000
911-
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19
912-
VLPROFEXEC MTASK_END 15820 id 6 predictCost 30
913+
VLPROFEXEC MTASK_BEGIN 15610 id 6 predictStart 0 cpu 19 module top
914+
VLPROFEXEC MTASK_END 15820 predictCost 30
913915
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 20000 cpu 19
914916
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 21000 cpu 19
915-
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19
916-
VLPROFEXEC MTASK_END 21875 id 10 predictCost 30
917+
VLPROFEXEC MTASK_BEGIN 21700 id 10 predictStart 196 cpu 19 module top
918+
VLPROFEXEC MTASK_END 21875 predictCost 30
917919
VLPROFEXEC EXEC_GRAPH_END 22085
918920
VLPROFTHREAD 1
919-
VLPROFEXEC MTASK_BEGIN 5495 id 5 predictStart 0 cpu 10
920-
VLPROFEXEC MTASK_END 6090 id 5 predictCost 30
921-
VLPROFEXEC MTASK_BEGIN 6300 id 7 predictStart 30 cpu 10
922-
VLPROFEXEC MTASK_END 6895 id 7 predictCost 30
923-
VLPROFEXEC MTASK_BEGIN 7490 id 8 predictStart 60 cpu 10
924-
VLPROFEXEC MTASK_END 8540 id 8 predictCost 107
925-
VLPROFEXEC MTASK_BEGIN 9135 id 9 predictStart 167 cpu 10
926-
VLPROFEXEC MTASK_END 9730 id 9 predictCost 30
927-
VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10
928-
VLPROFEXEC MTASK_END 11060 id 11 predictCost 30
929-
VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10
930-
VLPROFEXEC MTASK_END 18970 id 5 predictCost 30
931-
VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10
932-
VLPROFEXEC MTASK_END 19320 id 7 predictCost 30
933-
VLPROFEXEC MTASK_BEGIN 19670 id 8 predictStart 60 cpu 10
934-
VLPROFEXEC MTASK_END 19810 id 8 predictCost 107
935-
VLPROFEXEC MTASK_BEGIN 20650 id 9 predictStart 167 cpu 10
936-
VLPROFEXEC MTASK_END 20720 id 9 predictCost 30
937-
VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10
938-
VLPROFEXEC MTASK_END 21245 id 11 predictCost 30
921+
VLPROFEXEC MTASK_BEGIN 5495 id 5 predictStart 0 cpu 10 module top
922+
VLPROFEXEC MTASK_END 6090 predictCost 30
923+
VLPROFEXEC MTASK_BEGIN 6300 id 7 predictStart 30 cpu 10 module top
924+
VLPROFEXEC MTASK_END 6895 predictCost 30
925+
VLPROFEXEC MTASK_BEGIN 7490 id 8 predictStart 60 cpu 10 module top
926+
VLPROFEXEC MTASK_END 8540 predictCost 107
927+
VLPROFEXEC MTASK_BEGIN 9135 id 9 predictStart 167 cpu 10 module top
928+
VLPROFEXEC MTASK_END 9730 predictCost 30
929+
VLPROFEXEC MTASK_BEGIN 10255 id 11 predictStart 197 cpu 10 module top
930+
VLPROFEXEC MTASK_END 11060 predictCost 30
931+
VLPROFEXEC MTASK_BEGIN 18375 id 5 predictStart 0 cpu 10 module top
932+
VLPROFEXEC MTASK_END 18970 predictCost 30
933+
VLPROFEXEC MTASK_BEGIN 19145 id 7 predictStart 30 cpu 10 module top
934+
VLPROFEXEC MTASK_END 19320 predictCost 30
935+
VLPROFEXEC MTASK_BEGIN 19670 id 8 predictStart 60 cpu 10 module top
936+
VLPROFEXEC MTASK_END 19810 predictCost 107
937+
VLPROFEXEC MTASK_BEGIN 20650 id 9 predictStart 167 cpu 10 module top
938+
VLPROFEXEC MTASK_END 20720 predictCost 30
939+
VLPROFEXEC MTASK_BEGIN 21140 id 11 predictStart 197 cpu 10 module top
940+
VLPROFEXEC MTASK_END 21245 predictCost 30
939941
VLPROFEXEC THREAD_SCHEDULE_WAIT_BEGIN 22000 cpu 10
940942
VLPROFEXEC THREAD_SCHEDULE_WAIT_END 23000 cpu 10
941943
VLPROF stat ticks 23415

0 commit comments

Comments
 (0)