1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "RecordReadThread.h"
18
19 #include <sys/resource.h>
20 #include <unistd.h>
21
22 #include <algorithm>
23 #include <unordered_map>
24
25 #include "environment.h"
26 #include "event_type.h"
27 #include "record.h"
28 #include "utils.h"
29
30 namespace simpleperf {
31
32 static constexpr size_t kDefaultLowBufferLevel = 10 * kMegabyte;
33 static constexpr size_t kDefaultCriticalBufferLevel = 5 * kMegabyte;
34
RecordBuffer(size_t buffer_size)35 RecordBuffer::RecordBuffer(size_t buffer_size)
36 : read_head_(0), write_head_(0), buffer_size_(buffer_size), buffer_(new char[buffer_size]) {}
37
GetFreeSize() const38 size_t RecordBuffer::GetFreeSize() const {
39 size_t write_head = write_head_.load(std::memory_order_relaxed);
40 size_t read_head = read_head_.load(std::memory_order_relaxed);
41 size_t write_tail = read_head > 0 ? read_head - 1 : buffer_size_ - 1;
42 if (write_head <= write_tail) {
43 return write_tail - write_head;
44 }
45 return buffer_size_ - write_head + write_tail;
46 }
47
AllocWriteSpace(size_t record_size)48 char* RecordBuffer::AllocWriteSpace(size_t record_size) {
49 size_t write_head = write_head_.load(std::memory_order_relaxed);
50 size_t read_head = read_head_.load(std::memory_order_acquire);
51 size_t write_tail = read_head > 0 ? read_head - 1 : buffer_size_ - 1;
52 cur_write_record_size_ = record_size;
53 if (write_head < write_tail) {
54 if (write_head + record_size > write_tail) {
55 return nullptr;
56 }
57 } else if (write_head + record_size > buffer_size_) {
58 // Not enough space at the end of the buffer, need to wrap to the start of the buffer.
59 if (write_tail < record_size) {
60 return nullptr;
61 }
62 if (buffer_size_ - write_head >= sizeof(perf_event_header)) {
63 // Set the size field in perf_event_header to 0. So GetCurrentRecord() can wrap to the start
64 // of the buffer when size is 0.
65 memset(buffer_.get() + write_head, 0, sizeof(perf_event_header));
66 }
67 cur_write_record_size_ += buffer_size_ - write_head;
68 write_head = 0;
69 }
70 return buffer_.get() + write_head;
71 }
72
FinishWrite()73 void RecordBuffer::FinishWrite() {
74 size_t write_head = write_head_.load(std::memory_order_relaxed);
75 write_head = (write_head + cur_write_record_size_) % buffer_size_;
76 write_head_.store(write_head, std::memory_order_release);
77 }
78
GetCurrentRecord()79 char* RecordBuffer::GetCurrentRecord() {
80 size_t write_head = write_head_.load(std::memory_order_acquire);
81 size_t read_head = read_head_.load(std::memory_order_relaxed);
82 if (read_head == write_head) {
83 return nullptr;
84 }
85 perf_event_header header;
86 if (read_head > write_head) {
87 if (buffer_size_ - read_head < sizeof(header) ||
88 (memcpy(&header, buffer_.get() + read_head, sizeof(header)) && header.size == 0)) {
89 // Need to wrap to the start of the buffer.
90 cur_read_record_size_ += buffer_size_ - read_head;
91 read_head = 0;
92 memcpy(&header, buffer_.get(), sizeof(header));
93 }
94 } else {
95 memcpy(&header, buffer_.get() + read_head, sizeof(header));
96 }
97 cur_read_record_size_ += header.size;
98 return buffer_.get() + read_head;
99 }
100
MoveToNextRecord()101 void RecordBuffer::MoveToNextRecord() {
102 size_t read_head = read_head_.load(std::memory_order_relaxed);
103 read_head = (read_head + cur_read_record_size_) % buffer_size_;
104 read_head_.store(read_head, std::memory_order_release);
105 cur_read_record_size_ = 0;
106 }
107
RecordParser(const perf_event_attr & attr)108 RecordParser::RecordParser(const perf_event_attr& attr)
109 : sample_type_(attr.sample_type),
110 read_format_(attr.read_format),
111 sample_regs_count_(__builtin_popcountll(attr.sample_regs_user)) {
112 size_t pos = sizeof(perf_event_header);
113 uint64_t mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
114 pos += __builtin_popcountll(sample_type_ & mask) * sizeof(uint64_t);
115 if (sample_type_ & PERF_SAMPLE_TID) {
116 pid_pos_in_sample_records_ = pos;
117 pos += sizeof(uint64_t);
118 }
119 if (sample_type_ & PERF_SAMPLE_TIME) {
120 time_pos_in_sample_records_ = pos;
121 pos += sizeof(uint64_t);
122 }
123 mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_CPU |
124 PERF_SAMPLE_PERIOD;
125 pos += __builtin_popcountll(sample_type_ & mask) * sizeof(uint64_t);
126 read_pos_in_sample_records_ = pos;
127 if ((sample_type_ & PERF_SAMPLE_TIME) && attr.sample_id_all) {
128 mask = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_ID;
129 time_rpos_in_non_sample_records_ =
130 (__builtin_popcountll(sample_type_ & mask) + 1) * sizeof(uint64_t);
131 }
132 }
133
GetTimePos(const perf_event_header & header) const134 size_t RecordParser::GetTimePos(const perf_event_header& header) const {
135 if (header.type == PERF_RECORD_SAMPLE) {
136 return time_pos_in_sample_records_;
137 }
138 if (time_rpos_in_non_sample_records_ != 0u &&
139 time_rpos_in_non_sample_records_ < header.size - sizeof(perf_event_header)) {
140 return header.size - time_rpos_in_non_sample_records_;
141 }
142 return 0;
143 }
144
GetStackSizePos(const std::function<void (size_t,size_t,void *)> & read_record_fn) const145 size_t RecordParser::GetStackSizePos(
146 const std::function<void(size_t, size_t, void*)>& read_record_fn) const {
147 size_t pos = read_pos_in_sample_records_;
148 if (sample_type_ & PERF_SAMPLE_READ) {
149 uint64_t nr = 1;
150 if (read_format_ & PERF_FORMAT_GROUP) {
151 read_record_fn(pos, sizeof(nr), &nr);
152 pos += sizeof(uint64_t);
153 }
154 size_t u64_count = nr;
155 u64_count += (read_format_ & PERF_FORMAT_TOTAL_TIME_ENABLED) ? 1 : 0;
156 u64_count += (read_format_ & PERF_FORMAT_TOTAL_TIME_RUNNING) ? 1 : 0;
157 u64_count += (read_format_ & PERF_FORMAT_ID) ? nr : 0;
158 pos += u64_count * sizeof(uint64_t);
159 }
160 if (sample_type_ & PERF_SAMPLE_CALLCHAIN) {
161 uint64_t ip_nr;
162 read_record_fn(pos, sizeof(ip_nr), &ip_nr);
163 pos += (ip_nr + 1) * sizeof(uint64_t);
164 }
165 if (sample_type_ & PERF_SAMPLE_RAW) {
166 uint32_t size;
167 read_record_fn(pos, sizeof(size), &size);
168 pos += size + sizeof(uint32_t);
169 }
170 if (sample_type_ & PERF_SAMPLE_BRANCH_STACK) {
171 uint64_t stack_nr;
172 read_record_fn(pos, sizeof(stack_nr), &stack_nr);
173 pos += sizeof(uint64_t) + stack_nr * sizeof(BranchStackItemType);
174 }
175 if (sample_type_ & PERF_SAMPLE_REGS_USER) {
176 uint64_t abi;
177 read_record_fn(pos, sizeof(abi), &abi);
178 pos += (1 + (abi == 0 ? 0 : sample_regs_count_)) * sizeof(uint64_t);
179 }
180 return (sample_type_ & PERF_SAMPLE_STACK_USER) ? pos : 0;
181 }
182
KernelRecordReader(EventFd * event_fd)183 KernelRecordReader::KernelRecordReader(EventFd* event_fd) : event_fd_(event_fd) {
184 size_t buffer_size;
185 buffer_ = event_fd_->GetMappedBuffer(buffer_size);
186 buffer_mask_ = buffer_size - 1;
187 }
188
GetDataFromKernelBuffer()189 bool KernelRecordReader::GetDataFromKernelBuffer() {
190 data_size_ = event_fd_->GetAvailableMmapDataSize(data_pos_);
191 if (data_size_ == 0) {
192 return false;
193 }
194 init_data_size_ = data_size_;
195 record_header_.size = 0;
196 return true;
197 }
198
ReadRecord(size_t pos,size_t size,void * dest)199 void KernelRecordReader::ReadRecord(size_t pos, size_t size, void* dest) {
200 pos = (pos + data_pos_) & buffer_mask_;
201 size_t copy_size = std::min(size, buffer_mask_ + 1 - pos);
202 memcpy(dest, buffer_ + pos, copy_size);
203 if (copy_size < size) {
204 memcpy(static_cast<char*>(dest) + copy_size, buffer_, size - copy_size);
205 }
206 }
207
MoveToNextRecord(const RecordParser & parser)208 bool KernelRecordReader::MoveToNextRecord(const RecordParser& parser) {
209 data_pos_ = (data_pos_ + record_header_.size) & buffer_mask_;
210 data_size_ -= record_header_.size;
211 if (data_size_ == 0) {
212 event_fd_->DiscardMmapData(init_data_size_);
213 init_data_size_ = 0;
214 return false;
215 }
216 ReadRecord(0, sizeof(record_header_), &record_header_);
217 size_t time_pos = parser.GetTimePos(record_header_);
218 if (time_pos != 0) {
219 ReadRecord(time_pos, sizeof(record_time_), &record_time_);
220 }
221 return true;
222 }
223
RecordReadThread(size_t record_buffer_size,const perf_event_attr & attr,size_t min_mmap_pages,size_t max_mmap_pages,size_t aux_buffer_size,bool allow_truncating_samples,bool exclude_perf)224 RecordReadThread::RecordReadThread(size_t record_buffer_size, const perf_event_attr& attr,
225 size_t min_mmap_pages, size_t max_mmap_pages,
226 size_t aux_buffer_size, bool allow_truncating_samples,
227 bool exclude_perf)
228 : record_buffer_(record_buffer_size),
229 record_parser_(attr),
230 attr_(attr),
231 min_mmap_pages_(min_mmap_pages),
232 max_mmap_pages_(max_mmap_pages),
233 aux_buffer_size_(aux_buffer_size) {
234 if (attr.sample_type & PERF_SAMPLE_STACK_USER) {
235 stack_size_in_sample_record_ = attr.sample_stack_user;
236 }
237 record_buffer_low_level_ = std::min(record_buffer_size / 4, kDefaultLowBufferLevel);
238 record_buffer_critical_level_ = std::min(record_buffer_size / 6, kDefaultCriticalBufferLevel);
239 LOG(VERBOSE) << "user buffer size = " << record_buffer_size
240 << ", low_level size = " << record_buffer_low_level_
241 << ", critical_level size = " << record_buffer_critical_level_;
242 if (!allow_truncating_samples) {
243 record_buffer_low_level_ = record_buffer_critical_level_;
244 }
245 if (exclude_perf) {
246 exclude_pid_ = getpid();
247 }
248 }
249
~RecordReadThread()250 RecordReadThread::~RecordReadThread() {
251 if (read_thread_) {
252 StopReadThread();
253 }
254 }
255
RegisterDataCallback(IOEventLoop & loop,const std::function<bool ()> & data_callback)256 bool RecordReadThread::RegisterDataCallback(IOEventLoop& loop,
257 const std::function<bool()>& data_callback) {
258 int cmd_fd[2];
259 int data_fd[2];
260 if (pipe2(cmd_fd, O_CLOEXEC) != 0 || pipe2(data_fd, O_CLOEXEC) != 0) {
261 PLOG(ERROR) << "pipe2";
262 return false;
263 }
264 read_cmd_fd_.reset(cmd_fd[0]);
265 write_cmd_fd_.reset(cmd_fd[1]);
266 cmd_ = NO_CMD;
267 read_data_fd_.reset(data_fd[0]);
268 write_data_fd_.reset(data_fd[1]);
269 has_data_notification_ = false;
270 if (!loop.AddReadEvent(read_data_fd_, data_callback)) {
271 return false;
272 }
273 read_thread_.reset(new std::thread([&]() { RunReadThread(); }));
274 return true;
275 }
276
AddEventFds(const std::vector<EventFd * > & event_fds)277 bool RecordReadThread::AddEventFds(const std::vector<EventFd*>& event_fds) {
278 return SendCmdToReadThread(CMD_ADD_EVENT_FDS, const_cast<std::vector<EventFd*>*>(&event_fds));
279 }
280
RemoveEventFds(const std::vector<EventFd * > & event_fds)281 bool RecordReadThread::RemoveEventFds(const std::vector<EventFd*>& event_fds) {
282 return SendCmdToReadThread(CMD_REMOVE_EVENT_FDS, const_cast<std::vector<EventFd*>*>(&event_fds));
283 }
284
SyncKernelBuffer()285 bool RecordReadThread::SyncKernelBuffer() {
286 return SendCmdToReadThread(CMD_SYNC_KERNEL_BUFFER, nullptr);
287 }
288
StopReadThread()289 bool RecordReadThread::StopReadThread() {
290 bool result = true;
291 if (read_thread_ != nullptr) {
292 result = SendCmdToReadThread(CMD_STOP_THREAD, nullptr);
293 if (result) {
294 read_thread_->join();
295 read_thread_ = nullptr;
296 }
297 }
298 return result;
299 }
300
SendCmdToReadThread(Cmd cmd,void * cmd_arg)301 bool RecordReadThread::SendCmdToReadThread(Cmd cmd, void* cmd_arg) {
302 {
303 std::lock_guard<std::mutex> lock(cmd_mutex_);
304 cmd_ = cmd;
305 cmd_arg_ = cmd_arg;
306 }
307 char unused = 0;
308 if (TEMP_FAILURE_RETRY(write(write_cmd_fd_, &unused, 1)) != 1) {
309 return false;
310 }
311 std::unique_lock<std::mutex> lock(cmd_mutex_);
312 while (cmd_ != NO_CMD) {
313 cmd_finish_cond_.wait(lock);
314 }
315 return cmd_result_;
316 }
317
GetRecord()318 std::unique_ptr<Record> RecordReadThread::GetRecord() {
319 record_buffer_.MoveToNextRecord();
320 char* p = record_buffer_.GetCurrentRecord();
321 if (p != nullptr) {
322 std::unique_ptr<Record> r = ReadRecordFromBuffer(attr_, p, record_buffer_.BufferEnd());
323 CHECK(r);
324 if (r->type() == PERF_RECORD_AUXTRACE) {
325 auto auxtrace = static_cast<AuxTraceRecord*>(r.get());
326 record_buffer_.AddCurrentRecordSize(auxtrace->data->aux_size);
327 auxtrace->location.addr = r->Binary() + r->size();
328 }
329 return r;
330 }
331 if (has_data_notification_) {
332 char unused;
333 TEMP_FAILURE_RETRY(read(read_data_fd_, &unused, 1));
334 has_data_notification_ = false;
335 }
336 return nullptr;
337 }
338
RunReadThread()339 void RecordReadThread::RunReadThread() {
340 IncreaseThreadPriority();
341 IOEventLoop loop;
342 CHECK(loop.AddReadEvent(read_cmd_fd_, [&]() { return HandleCmd(loop); }));
343 loop.RunLoop();
344 }
345
IncreaseThreadPriority()346 void RecordReadThread::IncreaseThreadPriority() {
347 // TODO: use real time priority for root.
348 rlimit rlim;
349 int result = getrlimit(RLIMIT_NICE, &rlim);
350 if (result == 0 && rlim.rlim_cur == 40) {
351 result = setpriority(PRIO_PROCESS, gettid(), -20);
352 if (result == 0) {
353 LOG(VERBOSE) << "Priority of record read thread is increased";
354 }
355 }
356 }
357
GetCmd()358 RecordReadThread::Cmd RecordReadThread::GetCmd() {
359 std::lock_guard<std::mutex> lock(cmd_mutex_);
360 return cmd_;
361 }
362
HandleCmd(IOEventLoop & loop)363 bool RecordReadThread::HandleCmd(IOEventLoop& loop) {
364 char unused;
365 TEMP_FAILURE_RETRY(read(read_cmd_fd_, &unused, 1));
366 bool result = true;
367 switch (GetCmd()) {
368 case CMD_ADD_EVENT_FDS:
369 result = HandleAddEventFds(loop, *static_cast<std::vector<EventFd*>*>(cmd_arg_));
370 break;
371 case CMD_REMOVE_EVENT_FDS:
372 result = HandleRemoveEventFds(*static_cast<std::vector<EventFd*>*>(cmd_arg_));
373 break;
374 case CMD_SYNC_KERNEL_BUFFER:
375 result = ReadRecordsFromKernelBuffer();
376 break;
377 case CMD_STOP_THREAD:
378 result = loop.ExitLoop();
379 break;
380 default:
381 LOG(ERROR) << "Unknown cmd: " << GetCmd();
382 result = false;
383 break;
384 }
385 std::lock_guard<std::mutex> lock(cmd_mutex_);
386 cmd_ = NO_CMD;
387 cmd_result_ = result;
388 cmd_finish_cond_.notify_one();
389 return true;
390 }
391
HandleAddEventFds(IOEventLoop & loop,const std::vector<EventFd * > & event_fds)392 bool RecordReadThread::HandleAddEventFds(IOEventLoop& loop,
393 const std::vector<EventFd*>& event_fds) {
394 std::unordered_map<int, EventFd*> cpu_map;
395 for (size_t pages = max_mmap_pages_; pages >= min_mmap_pages_; pages >>= 1) {
396 bool success = true;
397 bool report_error = pages == min_mmap_pages_;
398 for (EventFd* fd : event_fds) {
399 auto it = cpu_map.find(fd->Cpu());
400 if (it == cpu_map.end()) {
401 if (!fd->CreateMappedBuffer(pages, report_error)) {
402 success = false;
403 break;
404 }
405 if (IsEtmEventType(fd->attr().type)) {
406 if (!fd->CreateAuxBuffer(aux_buffer_size_, report_error)) {
407 fd->DestroyMappedBuffer();
408 success = false;
409 break;
410 }
411 has_etm_events_ = true;
412 }
413 cpu_map[fd->Cpu()] = fd;
414 } else {
415 if (!fd->ShareMappedBuffer(*(it->second), pages == min_mmap_pages_)) {
416 success = false;
417 break;
418 }
419 }
420 }
421 if (success) {
422 LOG(VERBOSE) << "Each kernel buffer is " << pages << " pages.";
423 break;
424 }
425 for (auto& pair : cpu_map) {
426 pair.second->DestroyMappedBuffer();
427 pair.second->DestroyAuxBuffer();
428 }
429 cpu_map.clear();
430 }
431 if (cpu_map.empty()) {
432 return false;
433 }
434 for (auto& pair : cpu_map) {
435 if (!pair.second->StartPolling(loop, [this]() { return ReadRecordsFromKernelBuffer(); })) {
436 return false;
437 }
438 kernel_record_readers_.emplace_back(pair.second);
439 }
440 return true;
441 }
442
HandleRemoveEventFds(const std::vector<EventFd * > & event_fds)443 bool RecordReadThread::HandleRemoveEventFds(const std::vector<EventFd*>& event_fds) {
444 for (auto& event_fd : event_fds) {
445 if (event_fd->HasMappedBuffer()) {
446 auto it = std::find_if(
447 kernel_record_readers_.begin(), kernel_record_readers_.end(),
448 [&](const KernelRecordReader& reader) { return reader.GetEventFd() == event_fd; });
449 if (it != kernel_record_readers_.end()) {
450 kernel_record_readers_.erase(it);
451 event_fd->StopPolling();
452 event_fd->DestroyMappedBuffer();
453 event_fd->DestroyAuxBuffer();
454 }
455 }
456 }
457 return true;
458 }
459
CompareRecordTime(KernelRecordReader * r1,KernelRecordReader * r2)460 static bool CompareRecordTime(KernelRecordReader* r1, KernelRecordReader* r2) {
461 return r1->RecordTime() > r2->RecordTime();
462 }
463
464 // When reading from mmap buffers, we prefer reading from all buffers at once rather than reading
465 // one buffer at a time. Because by reading all buffers at once, we can merge records from
466 // different buffers easily in memory. Otherwise, we have to sort records with greater effort.
ReadRecordsFromKernelBuffer()467 bool RecordReadThread::ReadRecordsFromKernelBuffer() {
468 do {
469 std::vector<KernelRecordReader*> readers;
470 for (auto& reader : kernel_record_readers_) {
471 if (reader.GetDataFromKernelBuffer()) {
472 readers.push_back(&reader);
473 }
474 }
475 bool has_data = false;
476 if (!readers.empty()) {
477 has_data = true;
478 if (readers.size() == 1u) {
479 // Only one buffer has data, process it directly.
480 while (readers[0]->MoveToNextRecord(record_parser_)) {
481 PushRecordToRecordBuffer(readers[0]);
482 }
483 } else {
484 // Use a binary heap to merge records from different buffers. As records from the same
485 // buffer are already ordered by time, we only need to merge the first record from all
486 // buffers. And each time a record is popped from the heap, we put the next record from its
487 // buffer into the heap.
488 for (auto& reader : readers) {
489 reader->MoveToNextRecord(record_parser_);
490 }
491 std::make_heap(readers.begin(), readers.end(), CompareRecordTime);
492 size_t size = readers.size();
493 while (size > 0) {
494 std::pop_heap(readers.begin(), readers.begin() + size, CompareRecordTime);
495 PushRecordToRecordBuffer(readers[size - 1]);
496 if (readers[size - 1]->MoveToNextRecord(record_parser_)) {
497 std::push_heap(readers.begin(), readers.begin() + size, CompareRecordTime);
498 } else {
499 size--;
500 }
501 }
502 }
503 }
504 ReadAuxDataFromKernelBuffer(&has_data);
505 if (!has_data) {
506 break;
507 }
508 // Having collected everything available, this is a good time to
509 // try to re-enabled any events that might have been disabled by
510 // the kernel.
511 for (auto event_fd : event_fds_disabled_by_kernel_) {
512 event_fd->SetEnableEvent(true);
513 }
514 event_fds_disabled_by_kernel_.clear();
515 if (!SendDataNotificationToMainThread()) {
516 return false;
517 }
518 // If there are no commands, we can loop until there is no more data from the kernel.
519 } while (GetCmd() == NO_CMD);
520 return true;
521 }
522
PushRecordToRecordBuffer(KernelRecordReader * kernel_record_reader)523 void RecordReadThread::PushRecordToRecordBuffer(KernelRecordReader* kernel_record_reader) {
524 const perf_event_header& header = kernel_record_reader->RecordHeader();
525 if (header.type == PERF_RECORD_SAMPLE && exclude_pid_ != -1) {
526 uint32_t pid;
527 kernel_record_reader->ReadRecord(record_parser_.GetPidPosInSampleRecord(), sizeof(pid), &pid);
528 if (pid == exclude_pid_) {
529 return;
530 }
531 }
532 if (header.type == PERF_RECORD_SAMPLE && stack_size_in_sample_record_ > 1024) {
533 size_t free_size = record_buffer_.GetFreeSize();
534 if (free_size < record_buffer_critical_level_) {
535 // When the free size in record buffer is below critical level, drop sample records to save
536 // space for more important records (like mmap or fork records).
537 stat_.userspace_lost_samples++;
538 return;
539 }
540 size_t stack_size_limit = stack_size_in_sample_record_;
541 if (free_size < record_buffer_low_level_) {
542 // When the free size in record buffer is below low level, truncate the stack data in sample
543 // records to 1K. This makes the unwinder unwind only part of the callchains, but hopefully
544 // the call chain joiner can complete the callchains.
545 stack_size_limit = 1024;
546 }
547 size_t stack_size_pos =
548 record_parser_.GetStackSizePos([&](size_t pos, size_t size, void* dest) {
549 return kernel_record_reader->ReadRecord(pos, size, dest);
550 });
551 uint64_t stack_size;
552 kernel_record_reader->ReadRecord(stack_size_pos, sizeof(stack_size), &stack_size);
553 if (stack_size > 0) {
554 size_t dyn_stack_size_pos = stack_size_pos + sizeof(stack_size) + stack_size;
555 uint64_t dyn_stack_size;
556 kernel_record_reader->ReadRecord(dyn_stack_size_pos, sizeof(dyn_stack_size), &dyn_stack_size);
557 if (dyn_stack_size == 0) {
558 // If stack_user_data.dyn_size == 0, it may be because the kernel misses the patch to
559 // update dyn_size, like in N9 (See b/22612370). So assume all stack data is valid if
560 // dyn_size == 0.
561 // TODO: Add cts test.
562 dyn_stack_size = stack_size;
563 }
564 // When simpleperf requests the kernel to dump 64K stack per sample, it will allocate 64K
565 // space in each sample to store stack data. However, a thread may use less stack than 64K.
566 // So not all the 64K stack data in a sample is valid, and we only need to keep valid stack
567 // data, whose size is dyn_stack_size.
568 uint64_t new_stack_size = Align(std::min<uint64_t>(dyn_stack_size, stack_size_limit), 8);
569 if (stack_size > new_stack_size) {
570 // Remove part of the stack data.
571 perf_event_header new_header = header;
572 new_header.size -= stack_size - new_stack_size;
573 char* p = record_buffer_.AllocWriteSpace(new_header.size);
574 if (p != nullptr) {
575 memcpy(p, &new_header, sizeof(new_header));
576 size_t pos = sizeof(new_header);
577 kernel_record_reader->ReadRecord(pos, stack_size_pos - pos, p + pos);
578 memcpy(p + stack_size_pos, &new_stack_size, sizeof(uint64_t));
579 pos = stack_size_pos + sizeof(uint64_t);
580 kernel_record_reader->ReadRecord(pos, new_stack_size, p + pos);
581 memcpy(p + pos + new_stack_size, &new_stack_size, sizeof(uint64_t));
582 record_buffer_.FinishWrite();
583 if (new_stack_size < dyn_stack_size) {
584 stat_.userspace_truncated_stack_samples++;
585 }
586 } else {
587 stat_.userspace_lost_samples++;
588 }
589 return;
590 }
591 }
592 }
593 char* p = record_buffer_.AllocWriteSpace(header.size);
594 if (p != nullptr) {
595 kernel_record_reader->ReadRecord(0, header.size, p);
596 if (header.type == PERF_RECORD_AUX) {
597 AuxRecord r;
598 if (r.Parse(attr_, p, p + header.size) && (r.data->flags & PERF_AUX_FLAG_TRUNCATED)) {
599 // When the kernel sees aux output flagged with PERF_AUX_FLAG_TRUNCATED,
600 // it sets a pending disable on the event:
601 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/events/ring_buffer.c?h=v5.13#n516
602 // The truncated flag is set by the Coresight driver when some trace was lost,
603 // which can be caused by a full buffer. Therefore, try to re-enable the event
604 // only after we have collected the aux data.
605 event_fds_disabled_by_kernel_.insert(kernel_record_reader->GetEventFd());
606 }
607 } else if (header.type == PERF_RECORD_LOST) {
608 LostRecord r;
609 if (r.Parse(attr_, p, p + header.size)) {
610 stat_.kernelspace_lost_records += static_cast<size_t>(r.lost);
611 }
612 }
613 record_buffer_.FinishWrite();
614 } else {
615 if (header.type == PERF_RECORD_SAMPLE) {
616 stat_.userspace_lost_samples++;
617 } else {
618 stat_.userspace_lost_non_samples++;
619 }
620 }
621 }
622
ReadAuxDataFromKernelBuffer(bool * has_data)623 void RecordReadThread::ReadAuxDataFromKernelBuffer(bool* has_data) {
624 if (!has_etm_events_) {
625 return;
626 }
627 for (auto& reader : kernel_record_readers_) {
628 EventFd* event_fd = reader.GetEventFd();
629 if (event_fd->HasAuxBuffer()) {
630 char* buf[2];
631 size_t size[2];
632 uint64_t offset = event_fd->GetAvailableAuxData(&buf[0], &size[0], &buf[1], &size[1]);
633 size_t aux_size = size[0] + size[1];
634 if (aux_size == 0) {
635 continue;
636 }
637 *has_data = true;
638 AuxTraceRecord auxtrace(Align(aux_size, 8), offset, event_fd->Cpu(), 0, event_fd->Cpu());
639 size_t alloc_size = auxtrace.size() + auxtrace.data->aux_size;
640 char* p = nullptr;
641 if ((record_buffer_.GetFreeSize() < alloc_size + record_buffer_critical_level_) ||
642 (p = record_buffer_.AllocWriteSpace(alloc_size)) == nullptr) {
643 stat_.lost_aux_data_size += aux_size;
644 } else {
645 CHECK(p != nullptr);
646 MoveToBinaryFormat(auxtrace.Binary(), auxtrace.size(), p);
647 MoveToBinaryFormat(buf[0], size[0], p);
648 if (size[1] != 0) {
649 MoveToBinaryFormat(buf[1], size[1], p);
650 }
651 size_t pad_size = auxtrace.data->aux_size - aux_size;
652 if (pad_size != 0) {
653 uint64_t pad = 0;
654 memcpy(p, &pad, pad_size);
655 }
656 record_buffer_.FinishWrite();
657 stat_.aux_data_size += aux_size;
658 LOG(DEBUG) << "record aux data " << aux_size << " bytes";
659 }
660 event_fd->DiscardAuxData(aux_size);
661 }
662 }
663 }
664
SendDataNotificationToMainThread()665 bool RecordReadThread::SendDataNotificationToMainThread() {
666 if (has_etm_events_) {
667 // For ETM recording, the default buffer size is large enough to hold ETM data for several
668 // seconds. To reduce impact of processing ETM data (especially when --decode-etm is used),
669 // delay processing ETM data until the buffer is half full.
670 if (record_buffer_.GetFreeSize() >= record_buffer_.size() / 2) {
671 return true;
672 }
673 }
674 if (!has_data_notification_.load(std::memory_order_relaxed)) {
675 has_data_notification_ = true;
676 char unused = 0;
677 if (TEMP_FAILURE_RETRY(write(write_data_fd_, &unused, 1)) != 1) {
678 PLOG(ERROR) << "write";
679 return false;
680 }
681 }
682 return true;
683 }
684
685 } // namespace simpleperf
686