forked from goldshtn/linux-tracing-workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lockstat-solution.py
203 lines (168 loc) · 6.17 KB
/
lockstat-solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python
import sys
import itertools
from time import sleep
from bcc import BPF
text = """
#include <linux/ptrace.h>
struct thread_mutex_key_t {
u32 tid;
u64 mtx;
int lock_stack_id;
};
struct thread_mutex_val_t {
u64 wait_time_ns;
u64 lock_time_ns;
u64 enter_count;
};
struct mutex_timestamp_t {
u64 mtx;
u64 timestamp;
};
struct mutex_lock_time_key_t {
u32 tid;
u64 mtx;
};
struct mutex_lock_time_val_t {
u64 timestamp;
int stack_id;
};
// Mutex to the stack id which initialized that mutex
BPF_HASH(init_stacks, u64, int);
// Main info database about mutex and thread pairs
BPF_HASH(locks, struct thread_mutex_key_t, struct thread_mutex_val_t);
// Pid to the mutex address and timestamp of when the wait started
BPF_HASH(lock_start, u32, struct mutex_timestamp_t);
// Pid and mutex address to the timestamp of when the wait ended (mutex acquired) and the stack id
BPF_HASH(lock_end, struct mutex_lock_time_key_t, struct mutex_lock_time_val_t);
// Histogram of wait times
BPF_HISTOGRAM(mutex_wait_hist, u64);
// Histogram of hold times
BPF_HISTOGRAM(mutex_lock_hist, u64);
BPF_STACK_TRACE(stacks, 4096);
int probe_mutex_lock(struct pt_regs *ctx)
{
u64 now = bpf_ktime_get_ns();
u32 pid = bpf_get_current_pid_tgid();
struct mutex_timestamp_t val = {};
val.mtx = PT_REGS_PARM1(ctx);
val.timestamp = now;
lock_start.update(&pid, &val);
return 0;
}
int probe_mutex_lock_return(struct pt_regs *ctx)
{
u64 now = bpf_ktime_get_ns();
u32 pid = bpf_get_current_pid_tgid();
struct mutex_timestamp_t *entry = lock_start.lookup(&pid);
if (entry == 0)
return 0; // Missed the entry
u64 wait_time = now - entry->timestamp;
int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID|BPF_F_USER_STACK);
// If pthread_mutex_lock() returned 0, we have the lock
if (PT_REGS_RC(ctx) == 0) {
// Record the lock acquisition timestamp so that we can read it when unlocking
struct mutex_lock_time_key_t key = {};
key.mtx = entry->mtx;
key.tid = pid;
struct mutex_lock_time_val_t val = {};
val.timestamp = now;
val.stack_id = stack_id;
lock_end.update(&key, &val);
}
// Record the wait time for this mutex-tid-stack combination even if locking failed
struct thread_mutex_key_t tm_key = {};
tm_key.mtx = entry->mtx;
tm_key.tid = pid;
tm_key.lock_stack_id = stack_id;
struct thread_mutex_val_t *existing_tm_val, new_tm_val = {};
existing_tm_val = locks.lookup_or_init(&tm_key, &new_tm_val);
existing_tm_val->wait_time_ns += wait_time;
if (PT_REGS_RC(ctx) == 0) {
existing_tm_val->enter_count += 1;
}
u64 mtx_slot = bpf_log2l(wait_time / 1000);
mutex_wait_hist.increment(mtx_slot);
lock_start.delete(&pid);
return 0;
}
int probe_mutex_unlock(struct pt_regs *ctx)
{
u64 now = bpf_ktime_get_ns();
u64 mtx = PT_REGS_PARM1(ctx);
u32 pid = bpf_get_current_pid_tgid();
struct mutex_lock_time_key_t lock_key = {};
lock_key.mtx = mtx;
lock_key.tid = pid;
struct mutex_lock_time_val_t *lock_val = lock_end.lookup(&lock_key);
if (lock_val == 0)
return 0; // Missed the lock of this mutex
u64 hold_time = now - lock_val->timestamp;
struct thread_mutex_key_t tm_key = {};
tm_key.mtx = mtx;
tm_key.tid = pid;
tm_key.lock_stack_id = lock_val->stack_id;
struct thread_mutex_val_t *existing_tm_val = locks.lookup(&tm_key);
if (existing_tm_val == 0)
return 0; // Couldn't find this record
existing_tm_val->lock_time_ns += hold_time;
u64 slot = bpf_log2l(hold_time / 1000);
mutex_lock_hist.increment(slot);
lock_end.delete(&lock_key);
return 0;
}
int probe_mutex_init(struct pt_regs *ctx)
{
int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID|BPF_F_USER_STACK);
u64 mutex_addr = PT_REGS_PARM1(ctx);
init_stacks.update(&mutex_addr, &stack_id);
return 0;
}
"""
def attach(bpf, pid):
bpf.attach_uprobe(name="pthread", sym="pthread_mutex_init", fn_name="probe_mutex_init", pid=pid)
bpf.attach_uprobe(name="pthread", sym="pthread_mutex_lock", fn_name="probe_mutex_lock", pid=pid)
bpf.attach_uretprobe(name="pthread", sym="pthread_mutex_lock", fn_name="probe_mutex_lock_return", pid=pid)
bpf.attach_uprobe(name="pthread", sym="pthread_mutex_unlock", fn_name="probe_mutex_unlock", pid=pid)
def print_frame(bpf, pid, addr):
print("\t\t%16s (%x)" % (bpf.sym(addr, pid, show_module=True, show_offset=True), addr))
def print_stack(bpf, pid, stacks, stack_id):
for addr in stacks.walk(stack_id):
print_frame(bpf, pid, addr)
def run(pid):
bpf = BPF(text=text)
attach(bpf, pid)
init_stacks = bpf["init_stacks"]
stacks = bpf["stacks"]
locks = bpf["locks"]
mutex_lock_hist = bpf["mutex_lock_hist"]
mutex_wait_hist = bpf["mutex_wait_hist"]
while True:
sleep(5)
mutex_ids = {}
next_mutex_id = 1
for k, v in init_stacks.items():
mutex_id = "#%d" % next_mutex_id
next_mutex_id += 1
mutex_ids[k.value] = mutex_id
print("init stack for mutex %x (%s)" % (k.value, mutex_id))
print_stack(bpf, pid, stacks, v.value)
print("")
grouper = lambda (k, v): k.tid
sorted_by_thread = sorted(locks.items(), key=grouper)
locks_by_thread = itertools.groupby(sorted_by_thread, grouper)
for tid, items in locks_by_thread:
print("thread %d" % tid)
for k, v in sorted(items, key=lambda (k, v): -v.wait_time_ns):
mutex_descr = mutex_ids[k.mtx] if k.mtx in mutex_ids else bpf.sym(k.mtx, pid)
print("\tmutex %s ::: wait time %.2fus ::: hold time %.2fus ::: enter count %d" %
(mutex_descr, v.wait_time_ns/1000.0, v.lock_time_ns/1000.0, v.enter_count))
print_stack(bpf, pid, stacks, k.lock_stack_id)
print("")
mutex_wait_hist.print_log2_hist(val_type="wait time (us)")
mutex_lock_hist.print_log2_hist(val_type="hold time (us)")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("USAGE: %s pid" % sys.argv[0])
else:
run(int(sys.argv[1]))