1
1
#include " PerfListener.h"
2
2
3
+ #include " common/Assert.h"
3
4
#include " common/Exception.h"
5
+ #include " common/Utils.h"
4
6
#include " common/WithErrnoCheck.h"
5
7
#include " logger/Logger.h"
6
8
7
9
#include < asm/unistd.h>
10
+ #include < dirent.h>
8
11
#include < fcntl.h>
9
12
#include < linux/hw_breakpoint.h>
10
13
#include < linux/perf_event.h>
14
17
#include < cstdint>
15
18
#include < cstdlib>
16
19
#include < cstring>
20
+ #include < fstream>
17
21
18
22
namespace {
19
23
@@ -37,15 +41,15 @@ PerfListener::PerfListener(
37
41
uint64_t instructionCountLimit,
38
42
uint64_t samplingFactor)
39
43
: instructionCountLimit_(instructionCountLimit)
40
- , samplingFactor_{std::max<uint64_t >(1ULL , samplingFactor)}
41
- , perfFd_{-1 } {}
44
+ , samplingFactor_{std::max<uint64_t >(1ULL , samplingFactor)} {}
42
45
43
46
PerfListener::~PerfListener () {
44
- // TODO: handle closing perfFd in move assignement / constructor as well
45
- if (perfFd_ >= 0 ) {
46
- close (perfFd_);
47
- perfFd_ = -1 ;
48
- }
47
+ // TODO: handle closing perfFds in move assignement / constructor as well
48
+ for (int & fd: perfFds_)
49
+ if (fd >= 0 ) {
50
+ close (fd);
51
+ fd = -1 ;
52
+ }
49
53
}
50
54
51
55
void PerfListener::onPreFork () {
@@ -73,15 +77,110 @@ void PerfListener::onPreFork() {
73
77
pthread_barrierattr_destroy (&attr);
74
78
}
75
79
80
+ // A "simple" file is oneline and has no whitespace.
81
+ std::string readSimpleFile (const std::string& path) {
82
+ std::string result;
83
+ std::ifstream file (path);
84
+ if (!file.is_open ()) {
85
+ return " " ;
86
+ }
87
+ file >> result;
88
+ return result;
89
+ }
90
+
91
+ // Parse "config", "config1" and "config2", for everything else return -1.
92
+ int getConfigFieldNumber (const std::string& name) {
93
+ if (name.size () < 6 || name.size () > 7 || name.substr (0 , 6 ) != " config" ) {
94
+ return -1 ;
95
+ }
96
+ int index = name.size () == 7 ? int (name[6 ] - ' 0' ) : 0 ;
97
+ return index < 0 || index > 3 ? -1 : index;
98
+ }
99
+
100
+ struct perfEventConfig {
101
+ uint32_t type = 0 ;
102
+ uint64_t config[3 ] = {0 , 0 , 0 };
103
+ void insertIntoConfig (const std::string& format, uint64_t value) {
104
+ // `format` specifies the bits which need to be set to `value`.
105
+ // It can look like config:0-7, config1:8 or even config:0-7,32-35.
106
+ // According to https://lwn.net/Articles/611945/, the attributes
107
+ // can overlap.
108
+ auto spl = split (format, " :" );
109
+ assert_1 (spl.size () == 2 );
110
+ int field = getConfigFieldNumber (spl[0 ]);
111
+ assert_1 (field >= 0 );
112
+ // Iterate over bit ranges.
113
+ for (const std::string& s: split (spl[1 ], " ," )) {
114
+ auto rangespl = split (s, " -" );
115
+ assert_1 (rangespl.size () && rangespl.size () < 3 );
116
+ unsigned long start = std::stoul (rangespl[0 ]), end = start;
117
+ if (rangespl.size () == 2 ) {
118
+ end = std::stoul (rangespl[1 ]);
119
+ }
120
+ assert_1 (start <= end);
121
+ unsigned long width = end - start + 1 ;
122
+ config[field] |= (value & ((1ull << width) - 1 )) << start;
123
+ value >>= width;
124
+ }
125
+ assert_1 (!value);
126
+ }
127
+ };
128
+
76
129
void PerfListener::onPostForkParent (pid_t childPid) {
77
130
TRACE ();
78
131
79
132
childPid_ = childPid;
133
+
134
+ std::vector<perfEventConfig> eventConfigs;
135
+ const std::string sysfsPath = " /sys/devices" ;
136
+ std::unique_ptr<DIR, int (*)(DIR*)> sysfsDir (
137
+ withErrnoCheck (
138
+ " open /sys/devices directory" , opendir, sysfsPath.c_str ()),
139
+ closedir);
140
+ for (struct dirent * entry = readdir (sysfsDir.get ()); entry != nullptr ;
141
+ entry = readdir (sysfsDir.get ())) {
142
+ // According to linux's perf tool at tools/perf/util/pmus.c, we need
143
+ // to consider folders with the "cpu" name or with a "cpus" file inside.
144
+ std::string dir = sysfsPath + " /" + entry->d_name + " /" ,
145
+ cpus = readSimpleFile (dir + " cpus" ),
146
+ type = readSimpleFile (dir + " type" );
147
+ if ((strcmp (entry->d_name , " cpu" ) && !cpus.size ()) || !type.size ()) {
148
+ continue ;
149
+ }
150
+
151
+ logger::debug (" Generating a raw perf event from " , dir);
152
+ perfEventConfig config;
153
+ config.type = std::stoul (type);
154
+ std::string configStr = readSimpleFile (dir + " events/instructions" );
155
+ assert_1 (configStr.size ());
156
+
157
+ // This parses for example "event=0x2e,umask=0x4f"
158
+ for (const std::string& s: split (configStr, " ," )) {
159
+ auto spl = split (s, " =0x" );
160
+ assert_1 (spl.size () == 2 );
161
+ std::string& name = spl[0 ];
162
+ uint64_t value = std::stoull (spl[1 ], nullptr , 16 );
163
+ logger::debug (" Setting '" , name, " ' in the config to " , value);
164
+ int field = getConfigFieldNumber (name);
165
+ if (field >= 0 ) { // set an entire config field
166
+ config.config [field] |= value;
167
+ }
168
+ else { // set only certain bits
169
+ std::string format = readSimpleFile (dir + " format/" + name);
170
+ assert_1 (format.size ());
171
+ config.insertIntoConfig (format, value);
172
+ }
173
+ }
174
+ eventConfigs.emplace_back (config);
175
+ }
176
+ // Inform the user rather than silently provide a possibly faulty fallback.
177
+ if (eventConfigs.empty ()) {
178
+ throw Exception (" failed to generate at least one perf event config" );
179
+ }
180
+
80
181
struct perf_event_attr attrs {};
81
182
memset (&attrs, 0 , sizeof (attrs));
82
- attrs.type = PERF_TYPE_HARDWARE;
83
183
attrs.size = sizeof (attrs);
84
- attrs.config = PERF_COUNT_HW_INSTRUCTIONS;
85
184
attrs.exclude_user = 0 ;
86
185
attrs.exclude_kernel = 1 ;
87
186
attrs.exclude_hv = 1 ;
@@ -92,25 +191,35 @@ void PerfListener::onPostForkParent(pid_t childPid) {
92
191
attrs.sample_period = instructionCountLimit_ / samplingFactor_;
93
192
attrs.wakeup_events = 1 ;
94
193
}
95
- // Apparently older (3.13) kernel versions doesn't support
96
- // PERF_FLAG_FD_CLOEXEC. This fd will be closed anyway (by FilesListener) so
97
- // it isn't very bad to not use it on newer kernels (and add it with fcnlt)
98
- // until we implement some linux version discovery.
99
- perfFd_ = withErrnoCheck (
100
- " perf event open" ,
101
- perf_event_open,
102
- &attrs,
103
- childPid,
104
- -1 ,
105
- -1 ,
106
- PERF_FLAG_FD_NO_GROUP /* | PERF_FLAG_FD_CLOEXEC */ );
107
- withErrnoCheck (" set cloexec flag on perfFd" , fcntl, F_SETFD, FD_CLOEXEC);
108
- if (instructionCountLimit_ != 0 ) {
109
- int myPid = getpid ();
110
- withErrnoCheck (" fcntl" , fcntl, perfFd_, F_SETOWN, myPid);
111
- int oldFlags = withErrnoCheck (" fcntl" , fcntl, perfFd_, F_GETFL, 0 );
112
- ;
113
- withErrnoCheck (" fcntl" , fcntl, perfFd_, F_SETFL, oldFlags | O_ASYNC);
194
+
195
+ for (auto & config: eventConfigs) {
196
+ logger::debug (" Opening perf event for pmu with id " , config.type );
197
+ attrs.type = config.type ;
198
+ attrs.config = config.config [0 ];
199
+ attrs.config1 = config.config [1 ];
200
+ attrs.config2 = config.config [2 ];
201
+ // Apparently older (3.13) kernel versions doesn't support
202
+ // PERF_FLAG_FD_CLOEXEC. This fd will be closed anyway (by
203
+ // FilesListener) so it isn't very bad to not use it on newer kernels
204
+ // (and add it with fcnlt) until we implement some linux version
205
+ // discovery.
206
+ int perfFd = withErrnoCheck (
207
+ " perf event open" ,
208
+ perf_event_open,
209
+ &attrs,
210
+ childPid,
211
+ -1 ,
212
+ -1 ,
213
+ PERF_FLAG_FD_NO_GROUP /* | PERF_FLAG_FD_CLOEXEC */ );
214
+ withErrnoCheck (
215
+ " set cloexec flag on perfFd" , fcntl, F_SETFD, FD_CLOEXEC);
216
+ if (instructionCountLimit_ != 0 ) {
217
+ int myPid = getpid ();
218
+ withErrnoCheck (" fcntl" , fcntl, perfFd, F_SETOWN, myPid);
219
+ int oldFlags = withErrnoCheck (" fcntl" , fcntl, perfFd, F_GETFL, 0 );
220
+ withErrnoCheck (" fcntl" , fcntl, perfFd, F_SETFL, oldFlags | O_ASYNC);
221
+ }
222
+ perfFds_.emplace_back (perfFd);
114
223
}
115
224
116
225
pthread_barrier_wait (barrier_);
@@ -136,20 +245,24 @@ void PerfListener::onPostForkChild() {
136
245
uint64_t PerfListener::getInstructionsUsed () {
137
246
TRACE ();
138
247
139
- long long int instructionsUsed;
140
- int size = withErrnoCheck (
141
- " read perf value" ,
142
- read,
143
- perfFd_,
144
- &instructionsUsed,
145
- sizeof (long long ));
146
- if (size != sizeof (instructionsUsed)) {
147
- throw Exception (" read failed" );
148
- }
149
- if (instructionsUsed < 0 ) {
150
- throw Exception (" read negative instructions count" );
248
+ uint64_t instructionsUsedSum = 0 ;
249
+ for (int fd: perfFds_) {
250
+ long long int instructionsUsed;
251
+ int size = withErrnoCheck (
252
+ " read perf value" ,
253
+ read,
254
+ fd,
255
+ &instructionsUsed,
256
+ sizeof (long long ));
257
+ if (size != sizeof (instructionsUsed)) {
258
+ throw Exception (" read failed" );
259
+ }
260
+ if (instructionsUsed < 0 ) {
261
+ throw Exception (" read negative instructions count" );
262
+ }
263
+ instructionsUsedSum += static_cast <uint64_t >(instructionsUsed);
151
264
}
152
- return static_cast < uint64_t >(instructionsUsed) ;
265
+ return instructionsUsedSum ;
153
266
}
154
267
155
268
void PerfListener::onPostExecute () {
0 commit comments