1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "ETMDecoder.h" 18 19 #include <sstream> 20 21 #include <android-base/expected.h> 22 #include <android-base/logging.h> 23 #include <android-base/strings.h> 24 #include <llvm/Support/MemoryBuffer.h> 25 #include <opencsd.h> 26 27 #include "ETMConstants.h" 28 29 namespace simpleperf { 30 namespace { 31 32 class DecoderLogStr : public ocsdMsgLogStrOutI { 33 public: printOutStr(const std::string & out_str)34 void printOutStr(const std::string& out_str) override { LOG(DEBUG) << out_str; } 35 }; 36 37 class DecodeErrorLogger : public ocsdDefaultErrorLogger { 38 public: DecodeErrorLogger(const std::function<void (const ocsdError &)> & error_callback)39 DecodeErrorLogger(const std::function<void(const ocsdError&)>& error_callback) 40 : error_callback_(error_callback) { 41 initErrorLogger(OCSD_ERR_SEV_INFO, false); 42 msg_logger_.setLogOpts(ocsdMsgLogger::OUT_STR_CB); 43 msg_logger_.setStrOutFn(&log_str_); 44 setOutputLogger(&msg_logger_); 45 } 46 LogError(const ocsd_hndl_err_log_t handle,const ocsdError * error)47 void LogError(const ocsd_hndl_err_log_t handle, const ocsdError* error) override { 48 ocsdDefaultErrorLogger::LogError(handle, error); 49 if (error != nullptr) { 50 error_callback_(*error); 51 } 52 } 53 54 private: 55 std::function<void(const ocsdError&)> error_callback_; 56 DecoderLogStr log_str_; 57 ocsdMsgLogger msg_logger_; 58 }; 59 IsRespError(ocsd_datapath_resp_t resp)60 static bool IsRespError(ocsd_datapath_resp_t resp) { 61 return resp >= OCSD_RESP_ERR_CONT; 62 } 63 64 // Used instead of DecodeTree in OpenCSD to avoid linking decoders not for ETMV4 instruction tracing 65 // in OpenCSD. 66 class ETMV4IDecodeTree { 67 public: ETMV4IDecodeTree()68 ETMV4IDecodeTree() 69 : error_logger_(std::bind(&ETMV4IDecodeTree::ProcessError, this, std::placeholders::_1)) { 70 ocsd_err_t err = frame_decoder_.Init(); 71 CHECK_EQ(err, OCSD_OK); 72 err = frame_decoder_.Configure(OCSD_DFRMTR_FRAME_MEM_ALIGN); 73 CHECK_EQ(err, OCSD_OK); 74 frame_decoder_.getErrLogAttachPt()->attach(&error_logger_); 75 } 76 CreateDecoder(const EtmV4Config * config)77 bool CreateDecoder(const EtmV4Config* config) { 78 uint8_t trace_id = config->getTraceID(); 79 auto packet_decoder = std::make_unique<TrcPktProcEtmV4I>(trace_id); 80 packet_decoder->setProtocolConfig(config); 81 packet_decoder->getErrorLogAttachPt()->replace_first(&error_logger_); 82 frame_decoder_.getIDStreamAttachPt(trace_id)->attach(packet_decoder.get()); 83 auto result = packet_decoders_.emplace(trace_id, packet_decoder.release()); 84 if (!result.second) { 85 LOG(ERROR) << "trace id " << trace_id << " has been used"; 86 } 87 return result.second; 88 } 89 AttachPacketSink(uint8_t trace_id,IPktDataIn<EtmV4ITrcPacket> & packet_sink)90 void AttachPacketSink(uint8_t trace_id, IPktDataIn<EtmV4ITrcPacket>& packet_sink) { 91 auto& packet_decoder = packet_decoders_[trace_id]; 92 CHECK(packet_decoder); 93 packet_decoder->getPacketOutAttachPt()->replace_first(&packet_sink); 94 } 95 AttachPacketMonitor(uint8_t trace_id,IPktRawDataMon<EtmV4ITrcPacket> & packet_monitor)96 void AttachPacketMonitor(uint8_t trace_id, IPktRawDataMon<EtmV4ITrcPacket>& packet_monitor) { 97 auto& packet_decoder = packet_decoders_[trace_id]; 98 CHECK(packet_decoder); 99 packet_decoder->getRawPacketMonAttachPt()->replace_first(&packet_monitor); 100 } 101 AttachRawFramePrinter(RawFramePrinter & frame_printer)102 void AttachRawFramePrinter(RawFramePrinter& frame_printer) { 103 frame_decoder_.Configure(frame_decoder_.getConfigFlags() | OCSD_DFRMTR_PACKED_RAW_OUT); 104 frame_decoder_.getTrcRawFrameAttachPt()->replace_first(&frame_printer); 105 } 106 GetFormattedDataIn()107 ITrcDataIn& GetFormattedDataIn() { return frame_decoder_; } 108 GetUnformattedDataIn(uint8_t trace_id)109 ITrcDataIn& GetUnformattedDataIn(uint8_t trace_id) { 110 auto& decoder = packet_decoders_[trace_id]; 111 CHECK(decoder); 112 return *decoder; 113 } 114 ProcessError(const ocsdError & error)115 void ProcessError(const ocsdError& error) { 116 if (error.getErrorCode() == OCSD_ERR_INVALID_PCKT_HDR) { 117 // Found an invalid packet header, following packets for this trace id may also be invalid. 118 // So reset the decoder to find I_ASYNC packet in the data stream. 119 if (auto it = packet_decoders_.find(error.getErrorChanID()); it != packet_decoders_.end()) { 120 auto& packet_decoder = it->second; 121 CHECK(packet_decoder); 122 packet_decoder->TraceDataIn(OCSD_OP_RESET, error.getErrorIndex(), 0, nullptr, nullptr); 123 } 124 } 125 } 126 ErrorLogger()127 DecodeErrorLogger& ErrorLogger() { return error_logger_; } 128 129 private: 130 DecodeErrorLogger error_logger_; 131 TraceFormatterFrameDecoder frame_decoder_; 132 std::unordered_map<uint8_t, std::unique_ptr<TrcPktProcEtmV4I>> packet_decoders_; 133 }; 134 135 // Similar to IPktDataIn<EtmV4ITrcPacket>, but add trace id. 136 struct PacketCallback { 137 // packet callbacks are called in priority order. 138 enum Priority { 139 MAP_LOCATOR, 140 BRANCH_LIST_PARSER, 141 PACKET_TO_ELEMENT, 142 }; 143 PacketCallbacksimpleperf::__anon83ecdb8c0111::PacketCallback144 PacketCallback(Priority prio) : priority(prio) {} ~PacketCallbacksimpleperf::__anon83ecdb8c0111::PacketCallback145 virtual ~PacketCallback() {} 146 virtual ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op, 147 ocsd_trc_index_t index_sop, 148 const EtmV4ITrcPacket* pkt) = 0; 149 const Priority priority; 150 }; 151 152 // Receives packets from a packet decoder in OpenCSD library. 153 class PacketSink : public IPktDataIn<EtmV4ITrcPacket> { 154 public: PacketSink(uint8_t trace_id)155 PacketSink(uint8_t trace_id) : trace_id_(trace_id) {} 156 AddCallback(PacketCallback * callback)157 void AddCallback(PacketCallback* callback) { 158 auto it = std::lower_bound(callbacks_.begin(), callbacks_.end(), callback, 159 [](const PacketCallback* c1, const PacketCallback* c2) { 160 return c1->priority < c2->priority; 161 }); 162 callbacks_.insert(it, callback); 163 } 164 PacketDataIn(ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)165 ocsd_datapath_resp_t PacketDataIn(ocsd_datapath_op_t op, ocsd_trc_index_t index_sop, 166 const EtmV4ITrcPacket* pkt) override { 167 for (auto& callback : callbacks_) { 168 auto resp = callback->ProcessPacket(trace_id_, op, index_sop, pkt); 169 if (IsRespError(resp)) { 170 return resp; 171 } 172 } 173 return OCSD_RESP_CONT; 174 } 175 176 private: 177 uint8_t trace_id_; 178 std::vector<PacketCallback*> callbacks_; 179 }; 180 181 // For each trace_id, when given an addr, find the thread and map it belongs to. 182 class MapLocator : public PacketCallback { 183 public: MapLocator(ETMThreadTree & thread_tree)184 MapLocator(ETMThreadTree& thread_tree) 185 : PacketCallback(PacketCallback::MAP_LOCATOR), thread_tree_(thread_tree) {} 186 187 // Return current thread id of a trace_id. If not available, return -1. GetTid(uint8_t trace_id) const188 pid_t GetTid(uint8_t trace_id) const { return trace_data_[trace_id].tid; } 189 ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)190 ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op, 191 ocsd_trc_index_t index_sop, 192 const EtmV4ITrcPacket* pkt) override { 193 TraceData& data = trace_data_[trace_id]; 194 if (op == OCSD_OP_DATA) { 195 if (pkt != nullptr && ((!data.use_vmid && pkt->getContext().updated_c) || 196 (data.use_vmid && pkt->getContext().updated_v))) { 197 int32_t new_tid = 198 static_cast<int32_t>(data.use_vmid ? pkt->getContext().VMID : pkt->getContext().ctxtID); 199 if (data.tid != new_tid) { 200 data.tid = new_tid; 201 data.thread = nullptr; 202 data.userspace_map = nullptr; 203 } 204 } 205 } else if (op == OCSD_OP_RESET) { 206 data.tid = -1; 207 data.thread = nullptr; 208 data.userspace_map = nullptr; 209 } 210 return OCSD_RESP_CONT; 211 } 212 FindMap(uint8_t trace_id,uint64_t addr)213 const MapEntry* FindMap(uint8_t trace_id, uint64_t addr) { 214 TraceData& data = trace_data_[trace_id]; 215 if (data.userspace_map != nullptr && data.userspace_map->Contains(addr)) { 216 return data.userspace_map; 217 } 218 if (data.tid == -1) { 219 return nullptr; 220 } 221 if (data.thread == nullptr) { 222 data.thread = thread_tree_.FindThread(data.tid); 223 if (data.thread == nullptr) { 224 return nullptr; 225 } 226 } 227 data.userspace_map = data.thread->maps->FindMapByAddr(addr); 228 if (data.userspace_map != nullptr) { 229 return data.userspace_map; 230 } 231 // We don't cache kernel map. Because kernel map can start from 0 and overlap all userspace 232 // maps. 233 return thread_tree_.GetKernelMaps().FindMapByAddr(addr); 234 } 235 SetUseVmid(uint8_t trace_id,bool value)236 void SetUseVmid(uint8_t trace_id, bool value) { trace_data_[trace_id].use_vmid = value; } 237 238 private: 239 struct TraceData { 240 int32_t tid = -1; // thread id, -1 if invalid 241 const ThreadEntry* thread = nullptr; 242 const MapEntry* userspace_map = nullptr; 243 bool use_vmid = false; // use vmid for PID 244 }; 245 246 ETMThreadTree& thread_tree_; 247 TraceData trace_data_[256]; 248 }; 249 250 // Map (trace_id, ip address) to (binary_path, binary_offset), and read binary files. 251 class MemAccess : public ITargetMemAccess { 252 public: MemAccess(MapLocator & map_locator)253 MemAccess(MapLocator& map_locator) : map_locator_(map_locator) {} 254 ReadTargetMemory(const ocsd_vaddr_t address,uint8_t trace_id,ocsd_mem_space_acc_t,uint32_t * num_bytes,uint8_t * p_buffer)255 ocsd_err_t ReadTargetMemory(const ocsd_vaddr_t address, uint8_t trace_id, ocsd_mem_space_acc_t, 256 uint32_t* num_bytes, uint8_t* p_buffer) override { 257 TraceData& data = trace_data_[trace_id]; 258 const MapEntry* map = map_locator_.FindMap(trace_id, address); 259 // fast path 260 if (map != nullptr && map == data.buffer_map && address >= data.buffer_start && 261 address + *num_bytes <= data.buffer_end) { 262 if (data.buffer == nullptr) { 263 *num_bytes = 0; 264 } else { 265 memcpy(p_buffer, data.buffer + (address - data.buffer_start), *num_bytes); 266 } 267 return OCSD_OK; 268 } 269 270 // slow path 271 size_t copy_size = 0; 272 if (map != nullptr) { 273 llvm::MemoryBuffer* memory = GetMemoryBuffer(map->dso); 274 if (memory != nullptr) { 275 if (auto opt_offset = map->dso->IpToFileOffset(address, map->start_addr, map->pgoff); 276 opt_offset) { 277 uint64_t offset = opt_offset.value(); 278 size_t file_size = memory->getBufferSize(); 279 copy_size = file_size > offset ? std::min<size_t>(file_size - offset, *num_bytes) : 0; 280 if (copy_size > 0) { 281 memcpy(p_buffer, memory->getBufferStart() + offset, copy_size); 282 } 283 } 284 } 285 // Update the last buffer cache. 286 // Don't cache for the kernel map. Because simpleperf doesn't record an accurate kernel end 287 // addr. 288 if (!map->in_kernel) { 289 data.buffer_map = map; 290 data.buffer_start = map->start_addr; 291 data.buffer_end = map->get_end_addr(); 292 if (memory != nullptr && memory->getBufferSize() > map->pgoff && 293 (memory->getBufferSize() - map->pgoff >= map->len)) { 294 data.buffer = memory->getBufferStart() + map->pgoff; 295 } else if (memory == nullptr) { 296 data.buffer = nullptr; 297 } else { 298 // Memory was found, but the buffer is not good enough to be 299 // cached. "Invalidate" the cache by setting the map to 300 // null. 301 data.buffer_map = nullptr; 302 } 303 } 304 } 305 *num_bytes = copy_size; 306 return OCSD_OK; 307 } 308 InvalidateMemAccCache(const uint8_t cs_trace_id)309 void InvalidateMemAccCache(const uint8_t cs_trace_id) override {} 310 311 private: GetMemoryBuffer(Dso * dso)312 llvm::MemoryBuffer* GetMemoryBuffer(Dso* dso) { 313 auto it = elf_map_.find(dso); 314 if (it == elf_map_.end()) { 315 ElfStatus status; 316 auto res = elf_map_.emplace(dso, ElfFile::Open(dso->GetDebugFilePath(), &status)); 317 it = res.first; 318 } 319 return it->second ? it->second->GetMemoryBuffer() : nullptr; 320 } 321 322 struct TraceData { 323 const MapEntry* buffer_map = nullptr; 324 const char* buffer = nullptr; 325 uint64_t buffer_start = 0; 326 uint64_t buffer_end = 0; 327 }; 328 329 MapLocator& map_locator_; 330 std::unordered_map<Dso*, std::unique_ptr<ElfFile>> elf_map_; 331 TraceData trace_data_[256]; 332 }; 333 334 class InstructionDecoder : public TrcIDecode { 335 public: DecodeInstruction(ocsd_instr_info * instr_info)336 ocsd_err_t DecodeInstruction(ocsd_instr_info* instr_info) { 337 this->instr_info = instr_info; 338 return TrcIDecode::DecodeInstruction(instr_info); 339 } 340 341 ocsd_instr_info* instr_info; 342 }; 343 344 // Similar to ITrcGenElemIn, but add next instruction info, which is needed to get branch to addr 345 // for an InstructionRange element. 346 struct ElementCallback { 347 public: ~ElementCallbacksimpleperf::__anon83ecdb8c0111::ElementCallback348 virtual ~ElementCallback(){}; 349 virtual ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trace_id, 350 const OcsdTraceElement& elem, 351 const ocsd_instr_info* next_instr) = 0; 352 }; 353 354 // Decode packets into elements. 355 class PacketToElement : public PacketCallback, public ITrcGenElemIn { 356 public: PacketToElement(MapLocator & map_locator,const std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs,DecodeErrorLogger & error_logger)357 PacketToElement(MapLocator& map_locator, 358 const std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs, 359 DecodeErrorLogger& error_logger) 360 : PacketCallback(PacketCallback::PACKET_TO_ELEMENT), mem_access_(map_locator) { 361 for (auto& p : configs) { 362 uint8_t trace_id = p.first; 363 const EtmV4Config* config = p.second.get(); 364 element_decoders_.emplace(trace_id, trace_id); 365 auto& decoder = element_decoders_[trace_id]; 366 decoder.setProtocolConfig(config); 367 decoder.getErrorLogAttachPt()->replace_first(&error_logger); 368 decoder.getInstrDecodeAttachPt()->replace_first(&instruction_decoder_); 369 decoder.getMemoryAccessAttachPt()->replace_first(&mem_access_); 370 decoder.getTraceElemOutAttachPt()->replace_first(this); 371 } 372 } 373 AddCallback(ElementCallback * callback)374 void AddCallback(ElementCallback* callback) { callbacks_.push_back(callback); } 375 ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)376 ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op, 377 ocsd_trc_index_t index_sop, 378 const EtmV4ITrcPacket* pkt) override { 379 return element_decoders_[trace_id].PacketDataIn(op, index_sop, pkt); 380 } 381 TraceElemIn(const ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem)382 ocsd_datapath_resp_t TraceElemIn(const ocsd_trc_index_t index_sop, uint8_t trc_chan_id, 383 const OcsdTraceElement& elem) override { 384 for (auto& callback : callbacks_) { 385 auto resp = 386 callback->ProcessElement(index_sop, trc_chan_id, elem, instruction_decoder_.instr_info); 387 if (IsRespError(resp)) { 388 return resp; 389 } 390 } 391 return OCSD_RESP_CONT; 392 } 393 394 private: 395 // map from trace id of an etm device to its element decoder 396 std::unordered_map<uint8_t, TrcPktDecodeEtmV4I> element_decoders_; 397 MemAccess mem_access_; 398 InstructionDecoder instruction_decoder_; 399 std::vector<ElementCallback*> callbacks_; 400 }; 401 402 // Dump etm data generated at different stages. 403 class DataDumper : public ElementCallback { 404 public: DataDumper(ETMV4IDecodeTree & decode_tree)405 DataDumper(ETMV4IDecodeTree& decode_tree) : decode_tree_(decode_tree) {} 406 DumpRawData()407 void DumpRawData() { 408 decode_tree_.AttachRawFramePrinter(frame_printer_); 409 frame_printer_.setMessageLogger(&stdout_logger_); 410 } 411 DumpPackets(const std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs)412 void DumpPackets(const std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs) { 413 for (auto& p : configs) { 414 uint8_t trace_id = p.first; 415 auto result = packet_printers_.emplace(trace_id, trace_id); 416 CHECK(result.second); 417 auto& packet_printer = result.first->second; 418 decode_tree_.AttachPacketMonitor(trace_id, packet_printer); 419 packet_printer.setMessageLogger(&stdout_logger_); 420 } 421 } 422 DumpElements()423 void DumpElements() { element_printer_.setMessageLogger(&stdout_logger_); } 424 ProcessElement(ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem,const ocsd_instr_info *)425 ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trc_chan_id, 426 const OcsdTraceElement& elem, const ocsd_instr_info*) { 427 return element_printer_.TraceElemIn(index_sop, trc_chan_id, elem); 428 } 429 430 private: 431 ETMV4IDecodeTree& decode_tree_; 432 RawFramePrinter frame_printer_; 433 std::unordered_map<uint8_t, PacketPrinter<EtmV4ITrcPacket>> packet_printers_; 434 TrcGenericElementPrinter element_printer_; 435 ocsdMsgLogger stdout_logger_; 436 }; 437 438 // It decodes each ETMV4IPacket into TraceElements, and generates ETMInstrRanges from TraceElements. 439 // Decoding each packet is slow, but ensures correctness. 440 class InstrRangeParser : public ElementCallback { 441 private: 442 struct TraceData { 443 ETMInstrRange instr_range; 444 bool wait_for_branch_to_addr_fix = false; 445 }; 446 447 public: InstrRangeParser(MapLocator & map_locator,const ETMDecoder::InstrRangeCallbackFn & callback)448 InstrRangeParser(MapLocator& map_locator, const ETMDecoder::InstrRangeCallbackFn& callback) 449 : map_locator_(map_locator), callback_(callback) {} 450 ProcessElement(const ocsd_trc_index_t,uint8_t trace_id,const OcsdTraceElement & elem,const ocsd_instr_info * next_instr)451 ocsd_datapath_resp_t ProcessElement(const ocsd_trc_index_t, uint8_t trace_id, 452 const OcsdTraceElement& elem, 453 const ocsd_instr_info* next_instr) override { 454 if (elem.getType() == OCSD_GEN_TRC_ELEM_INSTR_RANGE) { 455 TraceData& data = trace_data_[trace_id]; 456 const MapEntry* map = map_locator_.FindMap(trace_id, elem.st_addr); 457 if (map == nullptr) { 458 FlushData(data); 459 return OCSD_RESP_CONT; 460 } 461 uint64_t start_addr = map->GetVaddrInFile(elem.st_addr); 462 auto& instr_range = data.instr_range; 463 464 if (data.wait_for_branch_to_addr_fix) { 465 // OpenCSD may cache a list of InstrRange elements, making it inaccurate to get branch to 466 // address from next_instr->branch_addr. So fix it by using the start address of the next 467 // InstrRange element. 468 instr_range.branch_to_addr = start_addr; 469 } 470 FlushData(data); 471 instr_range.dso = map->dso; 472 instr_range.start_addr = start_addr; 473 instr_range.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz); 474 bool end_with_branch = 475 elem.last_i_type == OCSD_INSTR_BR || elem.last_i_type == OCSD_INSTR_BR_INDIRECT; 476 bool branch_taken = end_with_branch && elem.last_instr_exec; 477 if (elem.last_i_type == OCSD_INSTR_BR && branch_taken) { 478 // It is based on the assumption that we only do immediate branch inside a binary, 479 // which may not be true for all cases. TODO: http://b/151665001. 480 instr_range.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr); 481 data.wait_for_branch_to_addr_fix = true; 482 } else { 483 instr_range.branch_to_addr = 0; 484 } 485 instr_range.branch_taken_count = branch_taken ? 1 : 0; 486 instr_range.branch_not_taken_count = branch_taken ? 0 : 1; 487 488 } else if (elem.getType() == OCSD_GEN_TRC_ELEM_TRACE_ON) { 489 // According to the ETM Specification, the Trace On element indicates a discontinuity in the 490 // instruction trace stream. So it cuts the connection between instr ranges. 491 FlushData(trace_data_[trace_id]); 492 } 493 return OCSD_RESP_CONT; 494 } 495 FinishData()496 void FinishData() { 497 for (auto& pair : trace_data_) { 498 FlushData(pair.second); 499 } 500 } 501 502 private: FlushData(TraceData & data)503 void FlushData(TraceData& data) { 504 if (data.instr_range.dso != nullptr) { 505 callback_(data.instr_range); 506 data.instr_range.dso = nullptr; 507 } 508 data.wait_for_branch_to_addr_fix = false; 509 } 510 511 MapLocator& map_locator_; 512 std::unordered_map<uint8_t, TraceData> trace_data_; 513 ETMDecoder::InstrRangeCallbackFn callback_; 514 }; 515 516 // It parses ETMBranchLists from ETMV4IPackets. 517 // It doesn't do element decoding and instruction decoding, thus is about 5 timers faster than 518 // InstrRangeParser. But some data will be lost when converting ETMBranchLists to InstrRanges: 519 // 1. InstrRanges described by Except packets (the last instructions executed before exeception, 520 // about 2%?). 521 // 2. Branch to addresses of direct branch instructions across binaries. 522 class BranchListParser : public PacketCallback { 523 private: 524 struct TraceData { 525 uint64_t addr = 0; 526 uint8_t addr_valid_bits = 0; 527 uint8_t isa = 0; 528 bool invalid_branch = false; 529 ETMBranchList branch; 530 }; 531 532 public: BranchListParser(MapLocator & map_locator,const ETMDecoder::BranchListCallbackFn & callback)533 BranchListParser(MapLocator& map_locator, const ETMDecoder::BranchListCallbackFn& callback) 534 : PacketCallback(BRANCH_LIST_PARSER), map_locator_(map_locator), callback_(callback) {} 535 CheckConfigs(std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs)536 void CheckConfigs(std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs) { 537 // TODO: Current implementation doesn't support non-zero speculation length and return stack. 538 for (auto& p : configs) { 539 if (p.second->MaxSpecDepth() > 0) { 540 LOG(WARNING) << "branch list collection isn't accurate with non-zero speculation length"; 541 break; 542 } 543 } 544 for (auto& p : configs) { 545 if (p.second->enabledRetStack()) { 546 LOG(WARNING) << "branch list collection will lose some data with return stack enabled"; 547 break; 548 } 549 } 550 } 551 IsAddrPacket(const EtmV4ITrcPacket * pkt)552 bool IsAddrPacket(const EtmV4ITrcPacket* pkt) { 553 return pkt->getType() >= ETM4_PKT_I_ADDR_CTXT_L_32IS0 && 554 pkt->getType() <= ETM4_PKT_I_ADDR_L_64IS1; 555 } 556 IsAtomPacket(const EtmV4ITrcPacket * pkt)557 bool IsAtomPacket(const EtmV4ITrcPacket* pkt) { return pkt->getAtom().num > 0; } 558 ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t,const EtmV4ITrcPacket * pkt)559 ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op, 560 ocsd_trc_index_t /*index_sop */, 561 const EtmV4ITrcPacket* pkt) override { 562 TraceData& data = trace_data_[trace_id]; 563 if (op == OCSD_OP_DATA) { 564 if (IsAddrPacket(pkt)) { 565 // Flush branch when seeing an Addr packet. Because it isn't correct to concatenate 566 // branches before and after an Addr packet. 567 FlushBranch(data); 568 data.addr = pkt->getAddrVal(); 569 data.addr_valid_bits = pkt->v_addr.valid_bits; 570 data.isa = pkt->getAddrIS(); 571 } 572 573 if (IsAtomPacket(pkt)) { 574 // An atom packet contains a branch list. We may receive one or more atom packets in a row, 575 // and need to concatenate them. 576 ProcessAtomPacket(trace_id, data, pkt); 577 } 578 579 } else { 580 // Flush branch when seeing a flush or reset operation. 581 FlushBranch(data); 582 if (op == OCSD_OP_RESET) { 583 data.addr = 0; 584 data.addr_valid_bits = 0; 585 data.isa = 0; 586 data.invalid_branch = false; 587 } 588 } 589 return OCSD_RESP_CONT; 590 } 591 FinishData()592 void FinishData() { 593 for (auto& pair : trace_data_) { 594 FlushBranch(pair.second); 595 } 596 } 597 598 private: ProcessAtomPacket(uint8_t trace_id,TraceData & data,const EtmV4ITrcPacket * pkt)599 void ProcessAtomPacket(uint8_t trace_id, TraceData& data, const EtmV4ITrcPacket* pkt) { 600 if (data.invalid_branch) { 601 return; // Skip atom packets when we think a branch list is invalid. 602 } 603 if (data.branch.branch.empty()) { 604 // This is the first atom packet in a branch list. Check if we have tid and addr info to 605 // parse it and the following atom packets. If not, mark the branch list as invalid. 606 if (map_locator_.GetTid(trace_id) == -1 || data.addr_valid_bits == 0) { 607 data.invalid_branch = true; 608 return; 609 } 610 const MapEntry* map = map_locator_.FindMap(trace_id, data.addr); 611 if (map == nullptr) { 612 data.invalid_branch = true; 613 return; 614 } 615 data.branch.dso = map->dso; 616 data.branch.addr = map->GetVaddrInFile(data.addr); 617 if (data.isa == 1) { // thumb instruction, mark it in bit 0. 618 data.branch.addr |= 1; 619 } 620 } 621 uint32_t bits = pkt->atom.En_bits; 622 for (size_t i = 0; i < pkt->atom.num; i++) { 623 data.branch.branch.push_back((bits & 1) == 1); 624 bits >>= 1; 625 } 626 } 627 FlushBranch(TraceData & data)628 void FlushBranch(TraceData& data) { 629 if (!data.branch.branch.empty()) { 630 callback_(data.branch); 631 data.branch.branch.clear(); 632 } 633 data.invalid_branch = false; 634 } 635 636 MapLocator& map_locator_; 637 ETMDecoder::BranchListCallbackFn callback_; 638 std::unordered_map<uint8_t, TraceData> trace_data_; 639 }; 640 641 // Etm data decoding in OpenCSD library has two steps: 642 // 1. From byte stream to etm packets. Each packet shows an event happened. For example, 643 // an Address packet shows the cpu is running the instruction at that address, an Atom 644 // packet shows whether the cpu decides to branch or not. 645 // 2. From etm packets to trace elements. To generates elements, the decoder needs both etm 646 // packets and executed binaries. For example, an InstructionRange element needs the decoder 647 // to find the next branch instruction starting from an address. 648 // 649 // ETMDecoderImpl uses OpenCSD library to decode etm data. It has the following properties: 650 // 1. Supports flexible decoding strategy. It allows installing packet callbacks and element 651 // callbacks, and decodes to either packets or elements based on requirements. 652 // 2. Supports dumping data at different stages. 653 class ETMDecoderImpl : public ETMDecoder { 654 public: ETMDecoderImpl(ETMThreadTree & thread_tree)655 ETMDecoderImpl(ETMThreadTree& thread_tree) : thread_tree_(thread_tree) { 656 // If the aux record for a thread is processed after it's thread exit record, we can't find 657 // the thread's maps when processing ETM data. To handle this, disable thread exit records. 658 thread_tree.DisableThreadExitRecords(); 659 } 660 CreateDecodeTree(const AuxTraceInfoRecord & auxtrace_info)661 void CreateDecodeTree(const AuxTraceInfoRecord& auxtrace_info) { 662 uint8_t trace_id = 0; 663 uint64_t* info = auxtrace_info.data->info; 664 for (int i = 0; i < auxtrace_info.data->nr_cpu; i++) { 665 if (info[0] == AuxTraceInfoRecord::MAGIC_ETM4) { 666 auto& etm4 = *reinterpret_cast<AuxTraceInfoRecord::ETM4Info*>(info); 667 ocsd_etmv4_cfg cfg; 668 memset(&cfg, 0, sizeof(cfg)); 669 cfg.reg_idr0 = etm4.trcidr0; 670 cfg.reg_idr1 = etm4.trcidr1; 671 cfg.reg_idr2 = etm4.trcidr2; 672 cfg.reg_idr8 = etm4.trcidr8; 673 cfg.reg_configr = etm4.trcconfigr; 674 cfg.reg_traceidr = etm4.trctraceidr; 675 cfg.arch_ver = ARCH_V8; 676 cfg.core_prof = profile_CortexA; 677 trace_id = cfg.reg_traceidr & 0x7f; 678 trace_ids_.emplace(etm4.cpu, trace_id); 679 configs_.emplace(trace_id, new EtmV4Config(&cfg)); 680 info = reinterpret_cast<uint64_t*>(&etm4 + 1); 681 } else { 682 CHECK_EQ(info[0], AuxTraceInfoRecord::MAGIC_ETE); 683 auto& ete = *reinterpret_cast<AuxTraceInfoRecord::ETEInfo*>(info); 684 ocsd_ete_cfg cfg; 685 memset(&cfg, 0, sizeof(cfg)); 686 cfg.reg_idr0 = ete.trcidr0; 687 cfg.reg_idr1 = ete.trcidr1; 688 cfg.reg_idr2 = ete.trcidr2; 689 cfg.reg_idr8 = ete.trcidr8; 690 cfg.reg_devarch = ete.trcdevarch; 691 cfg.reg_configr = ete.trcconfigr; 692 cfg.reg_traceidr = ete.trctraceidr; 693 cfg.arch_ver = ARCH_AA64; 694 cfg.core_prof = profile_CortexA; 695 trace_id = cfg.reg_traceidr & 0x7f; 696 trace_ids_.emplace(ete.cpu, trace_id); 697 configs_.emplace(trace_id, new ETEConfig(&cfg)); 698 info = reinterpret_cast<uint64_t*>(&ete + 1); 699 } 700 decode_tree_.CreateDecoder(configs_[trace_id].get()); 701 auto result = packet_sinks_.emplace(trace_id, trace_id); 702 CHECK(result.second); 703 decode_tree_.AttachPacketSink(trace_id, result.first->second); 704 } 705 } 706 EnableDump(const ETMDumpOption & option)707 void EnableDump(const ETMDumpOption& option) override { 708 dumper_.reset(new DataDumper(decode_tree_)); 709 if (option.dump_raw_data) { 710 dumper_->DumpRawData(); 711 } 712 if (option.dump_packets) { 713 dumper_->DumpPackets(configs_); 714 } 715 if (option.dump_elements) { 716 dumper_->DumpElements(); 717 InstallElementCallback(dumper_.get()); 718 } 719 } 720 RegisterCallback(const InstrRangeCallbackFn & callback)721 void RegisterCallback(const InstrRangeCallbackFn& callback) { 722 InstallMapLocator(); 723 instr_range_parser_.reset(new InstrRangeParser(*map_locator_, callback)); 724 InstallElementCallback(instr_range_parser_.get()); 725 } 726 RegisterCallback(const BranchListCallbackFn & callback)727 void RegisterCallback(const BranchListCallbackFn& callback) { 728 InstallMapLocator(); 729 branch_list_parser_.reset(new BranchListParser(*map_locator_, callback)); 730 branch_list_parser_->CheckConfigs(configs_); 731 InstallPacketCallback(branch_list_parser_.get()); 732 } 733 ProcessData(const uint8_t * data,size_t size,bool formatted,uint32_t cpu)734 bool ProcessData(const uint8_t* data, size_t size, bool formatted, uint32_t cpu) override { 735 // Reset decoders before processing each data block. Because: 736 // 1. Data blocks are not continuous. So decoders shouldn't keep previous states when 737 // processing a new block. 738 // 2. The beginning part of a data block may be truncated if kernel buffer is temporarily full. 739 // So we may see garbage data, which can cause decoding errors if we don't reset decoders. 740 LOG(DEBUG) << "Processing " << (!formatted ? "un" : "") << "formatted data with size " << size; 741 auto& decoder = formatted ? decode_tree_.GetFormattedDataIn() 742 : decode_tree_.GetUnformattedDataIn(trace_ids_[cpu]); 743 744 auto resp = decoder.TraceDataIn(OCSD_OP_RESET, data_index_, 0, nullptr, nullptr); 745 if (IsRespError(resp)) { 746 LOG(ERROR) << "failed to reset decoder, resp " << resp; 747 return false; 748 } 749 size_t left_size = size; 750 const size_t MAX_RESET_RETRY_COUNT = 3; 751 size_t reset_retry_count = 0; 752 while (left_size > 0) { 753 uint32_t processed; 754 auto resp = decoder.TraceDataIn(OCSD_OP_DATA, data_index_, left_size, data, &processed); 755 if (IsRespError(resp)) { 756 // A decoding error shouldn't ruin all data. Reset decoders to recover from it. 757 // But some errors may not be recoverable by resetting decoders. So use a max retry limit. 758 if (++reset_retry_count > MAX_RESET_RETRY_COUNT) { 759 break; 760 } 761 LOG(DEBUG) << "reset etm decoders for seeing a decode failure, resp " << resp 762 << ", reset_retry_count is " << reset_retry_count; 763 decoder.TraceDataIn(OCSD_OP_RESET, data_index_ + processed, 0, nullptr, nullptr); 764 } 765 data += processed; 766 left_size -= processed; 767 data_index_ += processed; 768 } 769 return true; 770 } 771 FinishData()772 bool FinishData() override { 773 if (instr_range_parser_) { 774 instr_range_parser_->FinishData(); 775 } 776 if (branch_list_parser_) { 777 branch_list_parser_->FinishData(); 778 } 779 return true; 780 } 781 782 private: InstallMapLocator()783 void InstallMapLocator() { 784 if (!map_locator_) { 785 map_locator_.reset(new MapLocator(thread_tree_)); 786 for (auto& cfg : configs_) { 787 int64_t configr = (*(const ocsd_etmv4_cfg*)*cfg.second).reg_configr; 788 map_locator_->SetUseVmid(cfg.first, 789 configr & (1U << ETM4_CFG_BIT_VMID | 1U << ETM4_CFG_BIT_VMID_OPT)); 790 } 791 792 InstallPacketCallback(map_locator_.get()); 793 } 794 } 795 InstallPacketCallback(PacketCallback * callback)796 void InstallPacketCallback(PacketCallback* callback) { 797 for (auto& p : packet_sinks_) { 798 p.second.AddCallback(callback); 799 } 800 } 801 InstallElementCallback(ElementCallback * callback)802 void InstallElementCallback(ElementCallback* callback) { 803 if (!packet_to_element_) { 804 InstallMapLocator(); 805 packet_to_element_.reset( 806 new PacketToElement(*map_locator_, configs_, decode_tree_.ErrorLogger())); 807 InstallPacketCallback(packet_to_element_.get()); 808 } 809 packet_to_element_->AddCallback(callback); 810 } 811 812 // map ip address to binary path and binary offset 813 ETMThreadTree& thread_tree_; 814 // handle to build OpenCSD decoder 815 ETMV4IDecodeTree decode_tree_; 816 // map from cpu to trace id 817 std::unordered_map<uint64_t, uint8_t> trace_ids_; 818 // map from the trace id of an etm device to its config 819 std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>> configs_; 820 // map from the trace id of an etm device to its PacketSink 821 std::unordered_map<uint8_t, PacketSink> packet_sinks_; 822 std::unique_ptr<PacketToElement> packet_to_element_; 823 std::unique_ptr<DataDumper> dumper_; 824 // an index keeping processed etm data size 825 size_t data_index_ = 0; 826 std::unique_ptr<InstrRangeParser> instr_range_parser_; 827 std::unique_ptr<MapLocator> map_locator_; 828 std::unique_ptr<BranchListParser> branch_list_parser_; 829 }; 830 831 } // namespace 832 ParseEtmDumpOption(const std::string & s,ETMDumpOption * option)833 bool ParseEtmDumpOption(const std::string& s, ETMDumpOption* option) { 834 for (auto& value : android::base::Split(s, ",")) { 835 if (value == "raw") { 836 option->dump_raw_data = true; 837 } else if (value == "packet") { 838 option->dump_packets = true; 839 } else if (value == "element") { 840 option->dump_elements = true; 841 } else { 842 LOG(ERROR) << "unknown etm dump option: " << value; 843 return false; 844 } 845 } 846 return true; 847 } 848 Create(const AuxTraceInfoRecord & auxtrace_info,ETMThreadTree & thread_tree)849 std::unique_ptr<ETMDecoder> ETMDecoder::Create(const AuxTraceInfoRecord& auxtrace_info, 850 ETMThreadTree& thread_tree) { 851 auto decoder = std::make_unique<ETMDecoderImpl>(thread_tree); 852 decoder->CreateDecodeTree(auxtrace_info); 853 return std::unique_ptr<ETMDecoder>(decoder.release()); 854 } 855 856 // Use OpenCSD instruction decoder to convert branches to instruction addresses. 857 class BranchDecoder { 858 public: Init(Dso * dso)859 android::base::expected<void, std::string> Init(Dso* dso) { 860 ElfStatus status; 861 elf_ = ElfFile::Open(dso->GetDebugFilePath(), &status); 862 if (!elf_) { 863 std::stringstream ss; 864 ss << status; 865 return android::base::unexpected(ss.str()); 866 } 867 if (dso->type() == DSO_KERNEL_MODULE) { 868 // Kernel module doesn't have program header. So create a fake one mapping to .text section. 869 for (const auto& section : elf_->GetSectionHeader()) { 870 if (section.name == ".text") { 871 segments_.resize(1); 872 segments_[0].is_executable = true; 873 segments_[0].is_load = true; 874 segments_[0].file_offset = section.file_offset; 875 segments_[0].file_size = section.size; 876 segments_[0].vaddr = section.vaddr; 877 break; 878 } 879 } 880 } else { 881 segments_ = elf_->GetProgramHeader(); 882 auto it = std::remove_if(segments_.begin(), segments_.end(), 883 [](const ElfSegment& s) { return !s.is_executable; }); 884 segments_.resize(it - segments_.begin()); 885 } 886 if (segments_.empty()) { 887 return android::base::unexpected("no segments"); 888 } 889 buffer_ = elf_->GetMemoryBuffer(); 890 return {}; 891 } 892 SetAddr(uint64_t addr,bool is_thumb)893 void SetAddr(uint64_t addr, bool is_thumb) { 894 memset(&instr_info_, 0, sizeof(instr_info_)); 895 instr_info_.pe_type.arch = ARCH_V8; 896 instr_info_.pe_type.profile = profile_CortexA; 897 instr_info_.isa = 898 elf_->Is64Bit() ? ocsd_isa_aarch64 : (is_thumb ? ocsd_isa_thumb2 : ocsd_isa_arm); 899 instr_info_.instr_addr = addr; 900 } 901 FindNextBranch()902 bool FindNextBranch() { 903 // Loop until we find a branch instruction. 904 while (ReadMem(instr_info_.instr_addr, 4, &instr_info_.opcode)) { 905 ocsd_err_t err = instruction_decoder_.DecodeInstruction(&instr_info_); 906 if (err != OCSD_OK) { 907 break; 908 } 909 if (instr_info_.type != OCSD_INSTR_OTHER) { 910 return true; 911 } 912 instr_info_.instr_addr += instr_info_.instr_size; 913 } 914 return false; 915 }; 916 InstrInfo()917 ocsd_instr_info& InstrInfo() { return instr_info_; } 918 919 private: ReadMem(uint64_t vaddr,size_t size,void * data)920 bool ReadMem(uint64_t vaddr, size_t size, void* data) { 921 for (auto& segment : segments_) { 922 if (vaddr >= segment.vaddr && vaddr + size <= segment.vaddr + segment.file_size) { 923 uint64_t offset = vaddr - segment.vaddr + segment.file_offset; 924 memcpy(data, buffer_->getBufferStart() + offset, size); 925 return true; 926 } 927 } 928 return false; 929 } 930 931 std::unique_ptr<ElfFile> elf_; 932 std::vector<ElfSegment> segments_; 933 llvm::MemoryBuffer* buffer_ = nullptr; 934 ocsd_instr_info instr_info_; 935 InstructionDecoder instruction_decoder_; 936 }; 937 ConvertETMBranchMapToInstrRanges(Dso * dso,const ETMBranchMap & branch_map,const ETMDecoder::InstrRangeCallbackFn & callback)938 android::base::expected<void, std::string> ConvertETMBranchMapToInstrRanges( 939 Dso* dso, const ETMBranchMap& branch_map, const ETMDecoder::InstrRangeCallbackFn& callback) { 940 ETMInstrRange instr_range; 941 instr_range.dso = dso; 942 943 BranchDecoder decoder; 944 if (auto result = decoder.Init(dso); !result.ok()) { 945 return result; 946 } 947 948 for (const auto& addr_p : branch_map) { 949 uint64_t start_addr = addr_p.first & ~1ULL; 950 bool is_thumb = addr_p.first & 1; 951 for (const auto& branch_p : addr_p.second) { 952 const std::vector<bool>& branch = branch_p.first; 953 uint64_t count = branch_p.second; 954 decoder.SetAddr(start_addr, is_thumb); 955 956 for (bool b : branch) { 957 ocsd_instr_info& instr = decoder.InstrInfo(); 958 uint64_t from_addr = instr.instr_addr; 959 if (!decoder.FindNextBranch()) { 960 break; 961 } 962 bool end_with_branch = instr.type == OCSD_INSTR_BR || instr.type == OCSD_INSTR_BR_INDIRECT; 963 bool branch_taken = end_with_branch && b; 964 instr_range.start_addr = from_addr; 965 instr_range.end_addr = instr.instr_addr; 966 if (instr.type == OCSD_INSTR_BR) { 967 instr_range.branch_to_addr = instr.branch_addr; 968 } else { 969 instr_range.branch_to_addr = 0; 970 } 971 instr_range.branch_taken_count = branch_taken ? count : 0; 972 instr_range.branch_not_taken_count = branch_taken ? 0 : count; 973 974 callback(instr_range); 975 976 if (b) { 977 instr.instr_addr = instr.branch_addr; 978 } else { 979 instr.instr_addr += instr.instr_size; 980 } 981 } 982 } 983 } 984 return {}; 985 } 986 987 } // namespace simpleperf 988