Skip to content

Commit 66e9be7

Browse files
authored
Merge pull request #46 from A-dead-pixel/intel-hybrid-PR
Add support for heterogenous cpus to PerfListener
2 parents b5903c6 + 76ca532 commit 66e9be7

File tree

3 files changed

+158
-43
lines changed

3 files changed

+158
-43
lines changed

src/perf/PerfListener.cc

Lines changed: 154 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#include "PerfListener.h"
22

3+
#include "common/Assert.h"
34
#include "common/Exception.h"
5+
#include "common/Utils.h"
46
#include "common/WithErrnoCheck.h"
57
#include "logger/Logger.h"
68

79
#include <asm/unistd.h>
10+
#include <dirent.h>
811
#include <fcntl.h>
912
#include <linux/hw_breakpoint.h>
1013
#include <linux/perf_event.h>
@@ -14,6 +17,7 @@
1417
#include <cstdint>
1518
#include <cstdlib>
1619
#include <cstring>
20+
#include <fstream>
1721

1822
namespace {
1923

@@ -37,15 +41,15 @@ PerfListener::PerfListener(
3741
uint64_t instructionCountLimit,
3842
uint64_t samplingFactor)
3943
: instructionCountLimit_(instructionCountLimit)
40-
, samplingFactor_{std::max<uint64_t>(1ULL, samplingFactor)}
41-
, perfFd_{-1} {}
44+
, samplingFactor_{std::max<uint64_t>(1ULL, samplingFactor)} {}
4245

4346
PerfListener::~PerfListener() {
44-
// TODO: handle closing perfFd in move assignement / constructor as well
45-
if (perfFd_ >= 0) {
46-
close(perfFd_);
47-
perfFd_ = -1;
48-
}
47+
// TODO: handle closing perfFds in move assignement / constructor as well
48+
for (int& fd: perfFds_)
49+
if (fd >= 0) {
50+
close(fd);
51+
fd = -1;
52+
}
4953
}
5054

5155
void PerfListener::onPreFork() {
@@ -73,15 +77,110 @@ void PerfListener::onPreFork() {
7377
pthread_barrierattr_destroy(&attr);
7478
}
7579

80+
// A "simple" file is oneline and has no whitespace.
81+
std::string readSimpleFile(const std::string& path) {
82+
std::string result;
83+
std::ifstream file(path);
84+
if (!file.is_open()) {
85+
return "";
86+
}
87+
file >> result;
88+
return result;
89+
}
90+
91+
// Parse "config", "config1" and "config2", for everything else return -1.
92+
int getConfigFieldNumber(const std::string& name) {
93+
if (name.size() < 6 || name.size() > 7 || name.substr(0, 6) != "config") {
94+
return -1;
95+
}
96+
int index = name.size() == 7 ? int(name[6] - '0') : 0;
97+
return index < 0 || index > 3 ? -1 : index;
98+
}
99+
100+
struct perfEventConfig {
101+
uint32_t type = 0;
102+
uint64_t config[3] = {0, 0, 0};
103+
void insertIntoConfig(const std::string& format, uint64_t value) {
104+
// `format` specifies the bits which need to be set to `value`.
105+
// It can look like config:0-7, config1:8 or even config:0-7,32-35.
106+
// According to https://lwn.net/Articles/611945/, the attributes
107+
// can overlap.
108+
auto spl = split(format, ":");
109+
assert_1(spl.size() == 2);
110+
int field = getConfigFieldNumber(spl[0]);
111+
assert_1(field >= 0);
112+
// Iterate over bit ranges.
113+
for (const std::string& s: split(spl[1], ",")) {
114+
auto rangespl = split(s, "-");
115+
assert_1(rangespl.size() && rangespl.size() < 3);
116+
unsigned long start = std::stoul(rangespl[0]), end = start;
117+
if (rangespl.size() == 2) {
118+
end = std::stoul(rangespl[1]);
119+
}
120+
assert_1(start <= end);
121+
unsigned long width = end - start + 1;
122+
config[field] |= (value & ((1ull << width) - 1)) << start;
123+
value >>= width;
124+
}
125+
assert_1(!value);
126+
}
127+
};
128+
76129
void PerfListener::onPostForkParent(pid_t childPid) {
77130
TRACE();
78131

79132
childPid_ = childPid;
133+
134+
std::vector<perfEventConfig> eventConfigs;
135+
const std::string sysfsPath = "/sys/devices";
136+
std::unique_ptr<DIR, int (*)(DIR*)> sysfsDir(
137+
withErrnoCheck(
138+
"open /sys/devices directory", opendir, sysfsPath.c_str()),
139+
closedir);
140+
for (struct dirent* entry = readdir(sysfsDir.get()); entry != nullptr;
141+
entry = readdir(sysfsDir.get())) {
142+
// According to linux's perf tool at tools/perf/util/pmus.c, we need
143+
// to consider folders with the "cpu" name or with a "cpus" file inside.
144+
std::string dir = sysfsPath + "/" + entry->d_name + "/",
145+
cpus = readSimpleFile(dir + "cpus"),
146+
type = readSimpleFile(dir + "type");
147+
if ((strcmp(entry->d_name, "cpu") && !cpus.size()) || !type.size()) {
148+
continue;
149+
}
150+
151+
logger::debug("Generating a raw perf event from ", dir);
152+
perfEventConfig config;
153+
config.type = std::stoul(type);
154+
std::string configStr = readSimpleFile(dir + "events/instructions");
155+
assert_1(configStr.size());
156+
157+
// This parses for example "event=0x2e,umask=0x4f"
158+
for (const std::string& s: split(configStr, ",")) {
159+
auto spl = split(s, "=0x");
160+
assert_1(spl.size() == 2);
161+
std::string& name = spl[0];
162+
uint64_t value = std::stoull(spl[1], nullptr, 16);
163+
logger::debug("Setting '", name, "' in the config to ", value);
164+
int field = getConfigFieldNumber(name);
165+
if (field >= 0) { // set an entire config field
166+
config.config[field] |= value;
167+
}
168+
else { // set only certain bits
169+
std::string format = readSimpleFile(dir + "format/" + name);
170+
assert_1(format.size());
171+
config.insertIntoConfig(format, value);
172+
}
173+
}
174+
eventConfigs.emplace_back(config);
175+
}
176+
// Inform the user rather than silently provide a possibly faulty fallback.
177+
if (eventConfigs.empty()) {
178+
throw Exception("failed to generate at least one perf event config");
179+
}
180+
80181
struct perf_event_attr attrs {};
81182
memset(&attrs, 0, sizeof(attrs));
82-
attrs.type = PERF_TYPE_HARDWARE;
83183
attrs.size = sizeof(attrs);
84-
attrs.config = PERF_COUNT_HW_INSTRUCTIONS;
85184
attrs.exclude_user = 0;
86185
attrs.exclude_kernel = 1;
87186
attrs.exclude_hv = 1;
@@ -92,25 +191,35 @@ void PerfListener::onPostForkParent(pid_t childPid) {
92191
attrs.sample_period = instructionCountLimit_ / samplingFactor_;
93192
attrs.wakeup_events = 1;
94193
}
95-
// Apparently older (3.13) kernel versions doesn't support
96-
// PERF_FLAG_FD_CLOEXEC. This fd will be closed anyway (by FilesListener) so
97-
// it isn't very bad to not use it on newer kernels (and add it with fcnlt)
98-
// until we implement some linux version discovery.
99-
perfFd_ = withErrnoCheck(
100-
"perf event open",
101-
perf_event_open,
102-
&attrs,
103-
childPid,
104-
-1,
105-
-1,
106-
PERF_FLAG_FD_NO_GROUP /* | PERF_FLAG_FD_CLOEXEC */);
107-
withErrnoCheck("set cloexec flag on perfFd", fcntl, F_SETFD, FD_CLOEXEC);
108-
if (instructionCountLimit_ != 0) {
109-
int myPid = getpid();
110-
withErrnoCheck("fcntl", fcntl, perfFd_, F_SETOWN, myPid);
111-
int oldFlags = withErrnoCheck("fcntl", fcntl, perfFd_, F_GETFL, 0);
112-
;
113-
withErrnoCheck("fcntl", fcntl, perfFd_, F_SETFL, oldFlags | O_ASYNC);
194+
195+
for (auto& config: eventConfigs) {
196+
logger::debug("Opening perf event for pmu with id ", config.type);
197+
attrs.type = config.type;
198+
attrs.config = config.config[0];
199+
attrs.config1 = config.config[1];
200+
attrs.config2 = config.config[2];
201+
// Apparently older (3.13) kernel versions doesn't support
202+
// PERF_FLAG_FD_CLOEXEC. This fd will be closed anyway (by
203+
// FilesListener) so it isn't very bad to not use it on newer kernels
204+
// (and add it with fcnlt) until we implement some linux version
205+
// discovery.
206+
int perfFd = withErrnoCheck(
207+
"perf event open",
208+
perf_event_open,
209+
&attrs,
210+
childPid,
211+
-1,
212+
-1,
213+
PERF_FLAG_FD_NO_GROUP /* | PERF_FLAG_FD_CLOEXEC */);
214+
withErrnoCheck(
215+
"set cloexec flag on perfFd", fcntl, F_SETFD, FD_CLOEXEC);
216+
if (instructionCountLimit_ != 0) {
217+
int myPid = getpid();
218+
withErrnoCheck("fcntl", fcntl, perfFd, F_SETOWN, myPid);
219+
int oldFlags = withErrnoCheck("fcntl", fcntl, perfFd, F_GETFL, 0);
220+
withErrnoCheck("fcntl", fcntl, perfFd, F_SETFL, oldFlags | O_ASYNC);
221+
}
222+
perfFds_.emplace_back(perfFd);
114223
}
115224

116225
pthread_barrier_wait(barrier_);
@@ -136,20 +245,24 @@ void PerfListener::onPostForkChild() {
136245
uint64_t PerfListener::getInstructionsUsed() {
137246
TRACE();
138247

139-
long long int instructionsUsed;
140-
int size = withErrnoCheck(
141-
"read perf value",
142-
read,
143-
perfFd_,
144-
&instructionsUsed,
145-
sizeof(long long));
146-
if (size != sizeof(instructionsUsed)) {
147-
throw Exception("read failed");
148-
}
149-
if (instructionsUsed < 0) {
150-
throw Exception("read negative instructions count");
248+
uint64_t instructionsUsedSum = 0;
249+
for (int fd: perfFds_) {
250+
long long int instructionsUsed;
251+
int size = withErrnoCheck(
252+
"read perf value",
253+
read,
254+
fd,
255+
&instructionsUsed,
256+
sizeof(long long));
257+
if (size != sizeof(instructionsUsed)) {
258+
throw Exception("read failed");
259+
}
260+
if (instructionsUsed < 0) {
261+
throw Exception("read negative instructions count");
262+
}
263+
instructionsUsedSum += static_cast<uint64_t>(instructionsUsed);
151264
}
152-
return static_cast<uint64_t>(instructionsUsed);
265+
return instructionsUsedSum;
153266
}
154267

155268
void PerfListener::onPostExecute() {

src/perf/PerfListener.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "printer/OutputSource.h"
66

77
#include <cstdint>
8+
#include <vector>
89

910
namespace s2j {
1011
namespace perf {
@@ -29,7 +30,7 @@ class PerfListener
2930

3031
const uint64_t instructionCountLimit_;
3132
const uint64_t samplingFactor_;
32-
int perfFd_;
33+
std::vector<int> perfFds_;
3334
pid_t childPid_{};
3435

3536
// Barrier used for synchronization

src/tracer/TraceExecutor.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ std::tuple<TraceAction, int> TraceExecutor::handleTraceeSignal(
205205
// Mask is shifted by one because the lowest bit of SigCgt mask
206206
// corresponds to signal 1, not 0.
207207
uint64_t caughtSignals =
208-
procfs::readProcFS(tracee.getPid(), procfs::Field::SIG_CGT) << 1;
208+
procfs::readProcFS(tracee.getPid(), procfs::Field::SIG_CGT)
209+
<< 1;
209210
caughtSignals |= IGNORED_SIGNALS;
210211
if ((caughtSignals & (1 << signal)) == 0U) {
211212
outputBuilder_->setKillSignal(signal);

0 commit comments

Comments
 (0)