forked from google/centipede
-
Notifications
You must be signed in to change notification settings - Fork 5
/
analyze_corpora.cc
95 lines (83 loc) · 3.49 KB
/
analyze_corpora.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./analyze_corpora.h"
#include <algorithm>
#include "absl/container/flat_hash_set.h"
#include "./control_flow.h"
#include "./corpus.h"
#include "./feature.h"
#include "./logging.h"
namespace centipede {
void AnalyzeCorpora(const BinaryInfo &binary_info,
const std::vector<CorpusRecord> &a,
const std::vector<CorpusRecord> &b) {
// `a_pcs` will contain all PCs covered by `a`.
absl::flat_hash_set<size_t> a_pcs;
for (const auto &record : a) {
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
a_pcs.insert(pc);
}
}
// `b_only_pcs` will contain PCs covered by `b` but not by `a`.
// `b_unique_indices` are indices of inputs that have PCs from `b_only_pcs`.
// `b_shared_indices` are indices of all other inputs from `b`.
absl::flat_hash_set<size_t> b_only_pcs;
std::vector<size_t> b_shared_indices, b_unique_indices;
for (size_t i = 0; i < b.size(); ++i) {
const auto &record = b[i];
bool has_b_only = false;
for (const auto &feature : record.features) {
if (!feature_domains::kPCs.Contains(feature)) continue;
auto pc = ConvertPCFeatureToPcIndex(feature);
if (a_pcs.contains(pc)) continue;
b_only_pcs.insert(pc);
has_b_only = true;
}
if (has_b_only)
b_unique_indices.push_back(i);
else
b_shared_indices.push_back(i);
}
LOG(INFO) << VV(a.size()) << VV(b.size()) << VV(a_pcs.size())
<< VV(b_only_pcs.size()) << VV(b_shared_indices.size())
<< VV(b_unique_indices.size());
const auto &pc_table = binary_info.pc_table;
const auto &symbols = binary_info.symbols;
CoverageLogger coverage_logger(pc_table, symbols);
CoverageFrontier frontier_a(binary_info);
frontier_a.Compute(a);
// TODO(kcc): use frontier_a to show the most interesting b-only PCs.
// Sort b-only PCs to print them in the canonical order, as in pc_table.
std::vector<size_t> b_only_pcs_vec{b_only_pcs.begin(), b_only_pcs.end()};
std::sort(b_only_pcs_vec.begin(), b_only_pcs_vec.end());
// First, print the newly covered functions (including partially covered).
LOG(INFO) << "B-only new functions:";
absl::flat_hash_set<std::string_view> b_only_new_functions;
for (const auto pc : b_only_pcs_vec) {
if (!pc_table[pc].has_flag(PCInfo::kFuncEntry)) continue;
auto str = coverage_logger.ObserveAndDescribeIfNew(pc);
if (!str.empty()) LOG(INFO).NoPrefix() << str;
b_only_new_functions.insert(symbols.func(pc));
}
// Now, print newly covered edges in functions that were covered in `a`.
LOG(INFO) << "B-only new edges:";
for (const auto pc : b_only_pcs_vec) {
if (b_only_new_functions.contains(symbols.func(pc))) continue;
auto str = coverage_logger.ObserveAndDescribeIfNew(pc);
if (!str.empty()) LOG(INFO).NoPrefix() << str;
}
}
} // namespace centipede