forked from google/centipede
-
Notifications
You must be signed in to change notification settings - Fork 5
/
runner_sancov.cc
255 lines (228 loc) · 10.1 KB
/
runner_sancov.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Instrumentation callbacks for SanitizerCoverage (sancov).
// https://clang.llvm.org/docs/SanitizerCoverage.html
#include <pthread.h>
#include <cstdint>
#include "./feature.h"
#include "./reverse_pc_table.h"
#include "./runner.h"
namespace centipede {
void RunnerSancov(){} // to be referenced in runner.cc
} // namespace centipede
using centipede::state;
using centipede::tls;
// Tracing data flow.
// The instrumentation is provided by
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
// For every load we get the address of the load. We can also get the caller PC.
// If the load address in
// [main_object.start_address, main_object.start_address + main_object.size),
// it is likely a global.
// We form a feature from a pair of {caller_pc, address_of_load}.
// The rationale here is that loading from a global address unique for the
// given PC is an interesting enough behavior that it warrants its own feature.
//
// Downsides:
// * The instrumentation is expensive, it can easily add 2x slowdown.
// * This creates plenty of features, easily 10x compared to control flow,
// and bloats the corpus. But this is also what we want to achieve here.
// NOTE: In addition to `always_inline`, also use `inline`, because some
// compilers require both to actually enforce inlining, e.g. GCC:
// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
#define ENFORCE_INLINE __attribute__((always_inline)) inline
// Use this attribute for functions that must not be instrumented even if
// the runner is built with sanitizers (asan, etc).
#define NO_SANITIZE __attribute__((no_sanitize("all")))
// NOTE: Enforce inlining so that `__builtin_return_address` works.
ENFORCE_INLINE static void TraceLoad(void *addr) {
if (!state.run_time_flags.use_dataflow_features) return;
auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
auto load_addr = reinterpret_cast<uintptr_t>(addr);
auto pc_offset = caller_pc - state.main_object.start_address;
if (pc_offset >= state.main_object.size) return; // PC outside of main obj.
auto addr_offset = load_addr - state.main_object.start_address;
if (addr_offset >= state.main_object.size) return; // Not a global address.
state.data_flow_feature_set.set(centipede::ConvertPcPairToNumber(
pc_offset, addr_offset, state.main_object.size));
}
// NOTE: Enforce inlining so that `__builtin_return_address` works.
ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
if (!state.run_time_flags.use_cmp_features) return;
auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
auto pc_offset = caller_pc - state.main_object.start_address;
uintptr_t hash =
centipede::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
state.cmp_feature_set.set(
centipede::ConvertContextAndArgPairToNumber(Arg1, Arg2, hash));
}
//------------------------------------------------------------------------------
// Implementations of the external sanitizer coverage hooks.
//------------------------------------------------------------------------------
extern "C" {
NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); }
NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); }
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
TraceCmp(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace2.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace4.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace8.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
TraceCmp(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace2.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace4.Capture(Arg1, Arg2);
}
NO_SANITIZE
void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
TraceCmp(Arg1, Arg2);
if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
tls.cmp_trace8.Capture(Arg1, Arg2);
}
// TODO(kcc): [impl] handle switch.
NO_SANITIZE
void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {}
// https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table
// This function is called at the DSO init time.
void __sanitizer_cov_pcs_init(const uintptr_t *beg, const uintptr_t *end) {
state.pcs_beg = beg;
state.pcs_end = end;
}
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow
// This function is called at the DSO init time.
void __sanitizer_cov_cfs_init(const uintptr_t *beg, const uintptr_t *end) {
state.cfs_beg = beg;
state.cfs_end = end;
}
// Handles one obeserved PC.
// `normalized_pc` is a integer representation of PC that is stable between
// the executions.
// With __sanitizer_cov_trace_pc_guard this is an index of PC in the PC table.
// With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting
// the DSO's dynamic start address.
static inline void HandleOnePc(uintptr_t normalized_pc) {
// Set the corresponding pc_feature unconditionally, even though there is
// run_time_flags.use_pc_features to avoid extra cmp on the fast path.
// The flag is checked when sending features to the engine.
state.pc_feature_set.set(normalized_pc);
// counter features.
if (state.run_time_flags.use_counter_features) {
state.counter_array.Increment(normalized_pc);
}
// path features.
if (auto path_level = state.run_time_flags.path_level) {
uintptr_t hash = tls.path_ring_buffer.push(normalized_pc, path_level);
state.path_feature_set.set(hash);
}
}
// Caller PC is the PC of the call instruction.
// Return address is the PC where the callee will return upon completion.
// On x86_64, CallerPC == ReturnAddress - 5
// On AArch64, CallerPC == ReturnAddress - 4
static uintptr_t ReturnAddressToCallerPc(uintptr_t return_address) {
#ifdef __x86_64__
return return_address - 5;
#elif defined(__aarch64__)
return return_address - 4;
#else
#error "unsupported architecture"
#endif
}
// MainObjectLazyInit() and helpers allow us to initialize state.main_object
// lazily and thread-safely on the first call to __sanitizer_cov_trace_pc().
//
// TODO(kcc): consider removing :dl_path_suffix= since with lazy init
// we can auto-detect the instrumented DSO.
//
// TODO(kcc): this lazy init is brittle.
// It assumes that __sanitizer_cov_trace_pc is the only code that touches
// state.main_object concurrently. I.e. we can not blindly reuse this lazy init
// for other instrumentation callbacks that use state.main_object.
// This code is also considered *temporary* because
// a) __sanitizer_cov_trace_pc is obsolete and we hope to not need it in future.
// b) a better option might be to do a non-lazy init by intercepting dlopen.
//
// We do not call MainObjectLazyInit() in
// __sanitizer_cov_trace_pc_guard() because
// a) there is not use case for that currently and
// b) it will slowdown the hot function.
static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT;
static void MainObjectLazyInitOnceCallback() {
state.main_object =
centipede::GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n",
state.main_object.start_address);
}
__attribute__((noinline)) static void MainObjectLazyInit() {
pthread_once(&main_object_lazy_init_once, MainObjectLazyInitOnceCallback);
}
// TODO(kcc): [impl] add proper testing for this callback.
// TODO(kcc): make sure the pc_table in the engine understands the raw PCs.
// TODO(kcc): this implementation is temporary. In order for symbolization to
// work we will need to translate the PC into a PCIndex or make pc_table sparse.
// See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs.
// This instrumentation is redundant if other instrumentation
// (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports
// this variant.
void __sanitizer_cov_trace_pc() {
uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
if (!state.main_object.start_address) MainObjectLazyInit();
pc -= state.main_object.start_address;
pc = ReturnAddressToCallerPc(pc);
auto idx = state.reverse_pc_table.GetPCIndex(pc);
if (idx != centipede::ReversePCTable::kUnknownPC) HandleOnePc(idx);
}
// This function is called at the DSO init time.
void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
state.pc_guard_start = start;
state.pc_guard_stop = stop;
}
// This function is called on every instrumented edge.
NO_SANITIZE
void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
// `guard` is in [pc_guard_start, pc_guard_stop), which gives us the offset.
uintptr_t offset = guard - state.pc_guard_start;
HandleOnePc(offset);
}
} // extern "C"