• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2019 The Android Open Source Project
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *      http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  #include "ETMDecoder.h"
18  
19  #include <sstream>
20  
21  #include <android-base/expected.h>
22  #include <android-base/logging.h>
23  #include <android-base/strings.h>
24  #include <llvm/Support/MemoryBuffer.h>
25  #include <opencsd.h>
26  
27  #include "ETMConstants.h"
28  
29  namespace simpleperf {
30  namespace {
31  
32  class DecoderLogStr : public ocsdMsgLogStrOutI {
33   public:
printOutStr(const std::string & out_str)34    void printOutStr(const std::string& out_str) override { LOG(DEBUG) << out_str; }
35  };
36  
37  class DecodeErrorLogger : public ocsdDefaultErrorLogger {
38   public:
DecodeErrorLogger(const std::function<void (const ocsdError &)> & error_callback)39    DecodeErrorLogger(const std::function<void(const ocsdError&)>& error_callback)
40        : error_callback_(error_callback) {
41      initErrorLogger(OCSD_ERR_SEV_INFO, false);
42      msg_logger_.setLogOpts(ocsdMsgLogger::OUT_STR_CB);
43      msg_logger_.setStrOutFn(&log_str_);
44      setOutputLogger(&msg_logger_);
45    }
46  
LogError(const ocsd_hndl_err_log_t handle,const ocsdError * error)47    void LogError(const ocsd_hndl_err_log_t handle, const ocsdError* error) override {
48      ocsdDefaultErrorLogger::LogError(handle, error);
49      if (error != nullptr) {
50        error_callback_(*error);
51      }
52    }
53  
54   private:
55    std::function<void(const ocsdError&)> error_callback_;
56    DecoderLogStr log_str_;
57    ocsdMsgLogger msg_logger_;
58  };
59  
IsRespError(ocsd_datapath_resp_t resp)60  static bool IsRespError(ocsd_datapath_resp_t resp) {
61    return resp >= OCSD_RESP_ERR_CONT;
62  }
63  
64  // Used instead of DecodeTree in OpenCSD to avoid linking decoders not for ETMV4 instruction tracing
65  // in OpenCSD.
66  class ETMV4IDecodeTree {
67   public:
ETMV4IDecodeTree()68    ETMV4IDecodeTree()
69        : error_logger_(std::bind(&ETMV4IDecodeTree::ProcessError, this, std::placeholders::_1)) {
70      ocsd_err_t err = frame_decoder_.Init();
71      CHECK_EQ(err, OCSD_OK);
72      err = frame_decoder_.Configure(OCSD_DFRMTR_FRAME_MEM_ALIGN);
73      CHECK_EQ(err, OCSD_OK);
74      frame_decoder_.getErrLogAttachPt()->attach(&error_logger_);
75    }
76  
CreateDecoder(const EtmV4Config * config)77    bool CreateDecoder(const EtmV4Config* config) {
78      uint8_t trace_id = config->getTraceID();
79      auto packet_decoder = std::make_unique<TrcPktProcEtmV4I>(trace_id);
80      packet_decoder->setProtocolConfig(config);
81      packet_decoder->getErrorLogAttachPt()->replace_first(&error_logger_);
82      frame_decoder_.getIDStreamAttachPt(trace_id)->attach(packet_decoder.get());
83      auto result = packet_decoders_.emplace(trace_id, packet_decoder.release());
84      if (!result.second) {
85        LOG(ERROR) << "trace id " << trace_id << " has been used";
86      }
87      return result.second;
88    }
89  
AttachPacketSink(uint8_t trace_id,IPktDataIn<EtmV4ITrcPacket> & packet_sink)90    void AttachPacketSink(uint8_t trace_id, IPktDataIn<EtmV4ITrcPacket>& packet_sink) {
91      auto& packet_decoder = packet_decoders_[trace_id];
92      CHECK(packet_decoder);
93      packet_decoder->getPacketOutAttachPt()->replace_first(&packet_sink);
94    }
95  
AttachPacketMonitor(uint8_t trace_id,IPktRawDataMon<EtmV4ITrcPacket> & packet_monitor)96    void AttachPacketMonitor(uint8_t trace_id, IPktRawDataMon<EtmV4ITrcPacket>& packet_monitor) {
97      auto& packet_decoder = packet_decoders_[trace_id];
98      CHECK(packet_decoder);
99      packet_decoder->getRawPacketMonAttachPt()->replace_first(&packet_monitor);
100    }
101  
AttachRawFramePrinter(RawFramePrinter & frame_printer)102    void AttachRawFramePrinter(RawFramePrinter& frame_printer) {
103      frame_decoder_.Configure(frame_decoder_.getConfigFlags() | OCSD_DFRMTR_PACKED_RAW_OUT);
104      frame_decoder_.getTrcRawFrameAttachPt()->replace_first(&frame_printer);
105    }
106  
GetFormattedDataIn()107    ITrcDataIn& GetFormattedDataIn() { return frame_decoder_; }
108  
GetUnformattedDataIn(uint8_t trace_id)109    ITrcDataIn& GetUnformattedDataIn(uint8_t trace_id) {
110      auto& decoder = packet_decoders_[trace_id];
111      CHECK(decoder);
112      return *decoder;
113    }
114  
ProcessError(const ocsdError & error)115    void ProcessError(const ocsdError& error) {
116      if (error.getErrorCode() == OCSD_ERR_INVALID_PCKT_HDR) {
117        // Found an invalid packet header, following packets for this trace id may also be invalid.
118        // So reset the decoder to find I_ASYNC packet in the data stream.
119        if (auto it = packet_decoders_.find(error.getErrorChanID()); it != packet_decoders_.end()) {
120          auto& packet_decoder = it->second;
121          CHECK(packet_decoder);
122          packet_decoder->TraceDataIn(OCSD_OP_RESET, error.getErrorIndex(), 0, nullptr, nullptr);
123        }
124      }
125    }
126  
ErrorLogger()127    DecodeErrorLogger& ErrorLogger() { return error_logger_; }
128  
129   private:
130    DecodeErrorLogger error_logger_;
131    TraceFormatterFrameDecoder frame_decoder_;
132    std::unordered_map<uint8_t, std::unique_ptr<TrcPktProcEtmV4I>> packet_decoders_;
133  };
134  
135  // Similar to IPktDataIn<EtmV4ITrcPacket>, but add trace id.
136  struct PacketCallback {
137    // packet callbacks are called in priority order.
138    enum Priority {
139      MAP_LOCATOR,
140      BRANCH_LIST_PARSER,
141      PACKET_TO_ELEMENT,
142    };
143  
PacketCallbacksimpleperf::__anon83ecdb8c0111::PacketCallback144    PacketCallback(Priority prio) : priority(prio) {}
~PacketCallbacksimpleperf::__anon83ecdb8c0111::PacketCallback145    virtual ~PacketCallback() {}
146    virtual ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
147                                               ocsd_trc_index_t index_sop,
148                                               const EtmV4ITrcPacket* pkt) = 0;
149    const Priority priority;
150  };
151  
152  // Receives packets from a packet decoder in OpenCSD library.
153  class PacketSink : public IPktDataIn<EtmV4ITrcPacket> {
154   public:
PacketSink(uint8_t trace_id)155    PacketSink(uint8_t trace_id) : trace_id_(trace_id) {}
156  
AddCallback(PacketCallback * callback)157    void AddCallback(PacketCallback* callback) {
158      auto it = std::lower_bound(callbacks_.begin(), callbacks_.end(), callback,
159                                 [](const PacketCallback* c1, const PacketCallback* c2) {
160                                   return c1->priority < c2->priority;
161                                 });
162      callbacks_.insert(it, callback);
163    }
164  
PacketDataIn(ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)165    ocsd_datapath_resp_t PacketDataIn(ocsd_datapath_op_t op, ocsd_trc_index_t index_sop,
166                                      const EtmV4ITrcPacket* pkt) override {
167      for (auto& callback : callbacks_) {
168        auto resp = callback->ProcessPacket(trace_id_, op, index_sop, pkt);
169        if (IsRespError(resp)) {
170          return resp;
171        }
172      }
173      return OCSD_RESP_CONT;
174    }
175  
176   private:
177    uint8_t trace_id_;
178    std::vector<PacketCallback*> callbacks_;
179  };
180  
181  // For each trace_id, when given an addr, find the thread and map it belongs to.
182  class MapLocator : public PacketCallback {
183   public:
MapLocator(ETMThreadTree & thread_tree)184    MapLocator(ETMThreadTree& thread_tree)
185        : PacketCallback(PacketCallback::MAP_LOCATOR), thread_tree_(thread_tree) {}
186  
187    // Return current thread id of a trace_id. If not available, return -1.
GetTid(uint8_t trace_id) const188    pid_t GetTid(uint8_t trace_id) const { return trace_data_[trace_id].tid; }
189  
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)190    ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
191                                       ocsd_trc_index_t index_sop,
192                                       const EtmV4ITrcPacket* pkt) override {
193      TraceData& data = trace_data_[trace_id];
194      if (op == OCSD_OP_DATA) {
195        if (pkt != nullptr && ((!data.use_vmid && pkt->getContext().updated_c) ||
196                               (data.use_vmid && pkt->getContext().updated_v))) {
197          int32_t new_tid =
198              static_cast<int32_t>(data.use_vmid ? pkt->getContext().VMID : pkt->getContext().ctxtID);
199          if (data.tid != new_tid) {
200            data.tid = new_tid;
201            data.thread = nullptr;
202            data.userspace_map = nullptr;
203          }
204        }
205      } else if (op == OCSD_OP_RESET) {
206        data.tid = -1;
207        data.thread = nullptr;
208        data.userspace_map = nullptr;
209      }
210      return OCSD_RESP_CONT;
211    }
212  
FindMap(uint8_t trace_id,uint64_t addr)213    const MapEntry* FindMap(uint8_t trace_id, uint64_t addr) {
214      TraceData& data = trace_data_[trace_id];
215      if (data.userspace_map != nullptr && data.userspace_map->Contains(addr)) {
216        return data.userspace_map;
217      }
218      if (data.tid == -1) {
219        return nullptr;
220      }
221      if (data.thread == nullptr) {
222        data.thread = thread_tree_.FindThread(data.tid);
223        if (data.thread == nullptr) {
224          return nullptr;
225        }
226      }
227      data.userspace_map = data.thread->maps->FindMapByAddr(addr);
228      if (data.userspace_map != nullptr) {
229        return data.userspace_map;
230      }
231      // We don't cache kernel map. Because kernel map can start from 0 and overlap all userspace
232      // maps.
233      return thread_tree_.GetKernelMaps().FindMapByAddr(addr);
234    }
235  
SetUseVmid(uint8_t trace_id,bool value)236    void SetUseVmid(uint8_t trace_id, bool value) { trace_data_[trace_id].use_vmid = value; }
237  
238   private:
239    struct TraceData {
240      int32_t tid = -1;  // thread id, -1 if invalid
241      const ThreadEntry* thread = nullptr;
242      const MapEntry* userspace_map = nullptr;
243      bool use_vmid = false;  // use vmid for PID
244    };
245  
246    ETMThreadTree& thread_tree_;
247    TraceData trace_data_[256];
248  };
249  
250  // Map (trace_id, ip address) to (binary_path, binary_offset), and read binary files.
251  class MemAccess : public ITargetMemAccess {
252   public:
MemAccess(MapLocator & map_locator)253    MemAccess(MapLocator& map_locator) : map_locator_(map_locator) {}
254  
ReadTargetMemory(const ocsd_vaddr_t address,uint8_t trace_id,ocsd_mem_space_acc_t,uint32_t * num_bytes,uint8_t * p_buffer)255    ocsd_err_t ReadTargetMemory(const ocsd_vaddr_t address, uint8_t trace_id, ocsd_mem_space_acc_t,
256                                uint32_t* num_bytes, uint8_t* p_buffer) override {
257      TraceData& data = trace_data_[trace_id];
258      const MapEntry* map = map_locator_.FindMap(trace_id, address);
259      // fast path
260      if (map != nullptr && map == data.buffer_map && address >= data.buffer_start &&
261          address + *num_bytes <= data.buffer_end) {
262        if (data.buffer == nullptr) {
263          *num_bytes = 0;
264        } else {
265          memcpy(p_buffer, data.buffer + (address - data.buffer_start), *num_bytes);
266        }
267        return OCSD_OK;
268      }
269  
270      // slow path
271      size_t copy_size = 0;
272      if (map != nullptr) {
273        llvm::MemoryBuffer* memory = GetMemoryBuffer(map->dso);
274        if (memory != nullptr) {
275          if (auto opt_offset = map->dso->IpToFileOffset(address, map->start_addr, map->pgoff);
276              opt_offset) {
277            uint64_t offset = opt_offset.value();
278            size_t file_size = memory->getBufferSize();
279            copy_size = file_size > offset ? std::min<size_t>(file_size - offset, *num_bytes) : 0;
280            if (copy_size > 0) {
281              memcpy(p_buffer, memory->getBufferStart() + offset, copy_size);
282            }
283          }
284        }
285        // Update the last buffer cache.
286        // Don't cache for the kernel map. Because simpleperf doesn't record an accurate kernel end
287        // addr.
288        if (!map->in_kernel) {
289          data.buffer_map = map;
290          data.buffer_start = map->start_addr;
291          data.buffer_end = map->get_end_addr();
292          if (memory != nullptr && memory->getBufferSize() > map->pgoff &&
293              (memory->getBufferSize() - map->pgoff >= map->len)) {
294            data.buffer = memory->getBufferStart() + map->pgoff;
295          } else if (memory == nullptr) {
296            data.buffer = nullptr;
297          } else {
298            // Memory was found, but the buffer is not good enough to be
299            // cached. "Invalidate" the cache by setting the map to
300            // null.
301            data.buffer_map = nullptr;
302          }
303        }
304      }
305      *num_bytes = copy_size;
306      return OCSD_OK;
307    }
308  
InvalidateMemAccCache(const uint8_t cs_trace_id)309    void InvalidateMemAccCache(const uint8_t cs_trace_id) override {}
310  
311   private:
GetMemoryBuffer(Dso * dso)312    llvm::MemoryBuffer* GetMemoryBuffer(Dso* dso) {
313      auto it = elf_map_.find(dso);
314      if (it == elf_map_.end()) {
315        ElfStatus status;
316        auto res = elf_map_.emplace(dso, ElfFile::Open(dso->GetDebugFilePath(), &status));
317        it = res.first;
318      }
319      return it->second ? it->second->GetMemoryBuffer() : nullptr;
320    }
321  
322    struct TraceData {
323      const MapEntry* buffer_map = nullptr;
324      const char* buffer = nullptr;
325      uint64_t buffer_start = 0;
326      uint64_t buffer_end = 0;
327    };
328  
329    MapLocator& map_locator_;
330    std::unordered_map<Dso*, std::unique_ptr<ElfFile>> elf_map_;
331    TraceData trace_data_[256];
332  };
333  
334  class InstructionDecoder : public TrcIDecode {
335   public:
DecodeInstruction(ocsd_instr_info * instr_info)336    ocsd_err_t DecodeInstruction(ocsd_instr_info* instr_info) {
337      this->instr_info = instr_info;
338      return TrcIDecode::DecodeInstruction(instr_info);
339    }
340  
341    ocsd_instr_info* instr_info;
342  };
343  
344  // Similar to ITrcGenElemIn, but add next instruction info, which is needed to get branch to addr
345  // for an InstructionRange element.
346  struct ElementCallback {
347   public:
~ElementCallbacksimpleperf::__anon83ecdb8c0111::ElementCallback348    virtual ~ElementCallback(){};
349    virtual ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trace_id,
350                                                const OcsdTraceElement& elem,
351                                                const ocsd_instr_info* next_instr) = 0;
352  };
353  
354  // Decode packets into elements.
355  class PacketToElement : public PacketCallback, public ITrcGenElemIn {
356   public:
PacketToElement(MapLocator & map_locator,const std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs,DecodeErrorLogger & error_logger)357    PacketToElement(MapLocator& map_locator,
358                    const std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs,
359                    DecodeErrorLogger& error_logger)
360        : PacketCallback(PacketCallback::PACKET_TO_ELEMENT), mem_access_(map_locator) {
361      for (auto& p : configs) {
362        uint8_t trace_id = p.first;
363        const EtmV4Config* config = p.second.get();
364        element_decoders_.emplace(trace_id, trace_id);
365        auto& decoder = element_decoders_[trace_id];
366        decoder.setProtocolConfig(config);
367        decoder.getErrorLogAttachPt()->replace_first(&error_logger);
368        decoder.getInstrDecodeAttachPt()->replace_first(&instruction_decoder_);
369        decoder.getMemoryAccessAttachPt()->replace_first(&mem_access_);
370        decoder.getTraceElemOutAttachPt()->replace_first(this);
371      }
372    }
373  
AddCallback(ElementCallback * callback)374    void AddCallback(ElementCallback* callback) { callbacks_.push_back(callback); }
375  
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t index_sop,const EtmV4ITrcPacket * pkt)376    ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
377                                       ocsd_trc_index_t index_sop,
378                                       const EtmV4ITrcPacket* pkt) override {
379      return element_decoders_[trace_id].PacketDataIn(op, index_sop, pkt);
380    }
381  
TraceElemIn(const ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem)382    ocsd_datapath_resp_t TraceElemIn(const ocsd_trc_index_t index_sop, uint8_t trc_chan_id,
383                                     const OcsdTraceElement& elem) override {
384      for (auto& callback : callbacks_) {
385        auto resp =
386            callback->ProcessElement(index_sop, trc_chan_id, elem, instruction_decoder_.instr_info);
387        if (IsRespError(resp)) {
388          return resp;
389        }
390      }
391      return OCSD_RESP_CONT;
392    }
393  
394   private:
395    // map from trace id of an etm device to its element decoder
396    std::unordered_map<uint8_t, TrcPktDecodeEtmV4I> element_decoders_;
397    MemAccess mem_access_;
398    InstructionDecoder instruction_decoder_;
399    std::vector<ElementCallback*> callbacks_;
400  };
401  
402  // Dump etm data generated at different stages.
403  class DataDumper : public ElementCallback {
404   public:
DataDumper(ETMV4IDecodeTree & decode_tree)405    DataDumper(ETMV4IDecodeTree& decode_tree) : decode_tree_(decode_tree) {}
406  
DumpRawData()407    void DumpRawData() {
408      decode_tree_.AttachRawFramePrinter(frame_printer_);
409      frame_printer_.setMessageLogger(&stdout_logger_);
410    }
411  
DumpPackets(const std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs)412    void DumpPackets(const std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs) {
413      for (auto& p : configs) {
414        uint8_t trace_id = p.first;
415        auto result = packet_printers_.emplace(trace_id, trace_id);
416        CHECK(result.second);
417        auto& packet_printer = result.first->second;
418        decode_tree_.AttachPacketMonitor(trace_id, packet_printer);
419        packet_printer.setMessageLogger(&stdout_logger_);
420      }
421    }
422  
DumpElements()423    void DumpElements() { element_printer_.setMessageLogger(&stdout_logger_); }
424  
ProcessElement(ocsd_trc_index_t index_sop,uint8_t trc_chan_id,const OcsdTraceElement & elem,const ocsd_instr_info *)425    ocsd_datapath_resp_t ProcessElement(ocsd_trc_index_t index_sop, uint8_t trc_chan_id,
426                                        const OcsdTraceElement& elem, const ocsd_instr_info*) {
427      return element_printer_.TraceElemIn(index_sop, trc_chan_id, elem);
428    }
429  
430   private:
431    ETMV4IDecodeTree& decode_tree_;
432    RawFramePrinter frame_printer_;
433    std::unordered_map<uint8_t, PacketPrinter<EtmV4ITrcPacket>> packet_printers_;
434    TrcGenericElementPrinter element_printer_;
435    ocsdMsgLogger stdout_logger_;
436  };
437  
438  // It decodes each ETMV4IPacket into TraceElements, and generates ETMInstrRanges from TraceElements.
439  // Decoding each packet is slow, but ensures correctness.
440  class InstrRangeParser : public ElementCallback {
441   private:
442    struct TraceData {
443      ETMInstrRange instr_range;
444      bool wait_for_branch_to_addr_fix = false;
445    };
446  
447   public:
InstrRangeParser(MapLocator & map_locator,const ETMDecoder::InstrRangeCallbackFn & callback)448    InstrRangeParser(MapLocator& map_locator, const ETMDecoder::InstrRangeCallbackFn& callback)
449        : map_locator_(map_locator), callback_(callback) {}
450  
ProcessElement(const ocsd_trc_index_t,uint8_t trace_id,const OcsdTraceElement & elem,const ocsd_instr_info * next_instr)451    ocsd_datapath_resp_t ProcessElement(const ocsd_trc_index_t, uint8_t trace_id,
452                                        const OcsdTraceElement& elem,
453                                        const ocsd_instr_info* next_instr) override {
454      if (elem.getType() == OCSD_GEN_TRC_ELEM_INSTR_RANGE) {
455        TraceData& data = trace_data_[trace_id];
456        const MapEntry* map = map_locator_.FindMap(trace_id, elem.st_addr);
457        if (map == nullptr) {
458          FlushData(data);
459          return OCSD_RESP_CONT;
460        }
461        uint64_t start_addr = map->GetVaddrInFile(elem.st_addr);
462        auto& instr_range = data.instr_range;
463  
464        if (data.wait_for_branch_to_addr_fix) {
465          // OpenCSD may cache a list of InstrRange elements, making it inaccurate to get branch to
466          // address from next_instr->branch_addr. So fix it by using the start address of the next
467          // InstrRange element.
468          instr_range.branch_to_addr = start_addr;
469        }
470        FlushData(data);
471        instr_range.dso = map->dso;
472        instr_range.start_addr = start_addr;
473        instr_range.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz);
474        bool end_with_branch =
475            elem.last_i_type == OCSD_INSTR_BR || elem.last_i_type == OCSD_INSTR_BR_INDIRECT;
476        bool branch_taken = end_with_branch && elem.last_instr_exec;
477        if (elem.last_i_type == OCSD_INSTR_BR && branch_taken) {
478          // It is based on the assumption that we only do immediate branch inside a binary,
479          // which may not be true for all cases. TODO: http://b/151665001.
480          instr_range.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr);
481          data.wait_for_branch_to_addr_fix = true;
482        } else {
483          instr_range.branch_to_addr = 0;
484        }
485        instr_range.branch_taken_count = branch_taken ? 1 : 0;
486        instr_range.branch_not_taken_count = branch_taken ? 0 : 1;
487  
488      } else if (elem.getType() == OCSD_GEN_TRC_ELEM_TRACE_ON) {
489        // According to the ETM Specification, the Trace On element indicates a discontinuity in the
490        // instruction trace stream. So it cuts the connection between instr ranges.
491        FlushData(trace_data_[trace_id]);
492      }
493      return OCSD_RESP_CONT;
494    }
495  
FinishData()496    void FinishData() {
497      for (auto& pair : trace_data_) {
498        FlushData(pair.second);
499      }
500    }
501  
502   private:
FlushData(TraceData & data)503    void FlushData(TraceData& data) {
504      if (data.instr_range.dso != nullptr) {
505        callback_(data.instr_range);
506        data.instr_range.dso = nullptr;
507      }
508      data.wait_for_branch_to_addr_fix = false;
509    }
510  
511    MapLocator& map_locator_;
512    std::unordered_map<uint8_t, TraceData> trace_data_;
513    ETMDecoder::InstrRangeCallbackFn callback_;
514  };
515  
516  // It parses ETMBranchLists from ETMV4IPackets.
517  // It doesn't do element decoding and instruction decoding, thus is about 5 timers faster than
518  // InstrRangeParser. But some data will be lost when converting ETMBranchLists to InstrRanges:
519  //   1. InstrRanges described by Except packets (the last instructions executed before exeception,
520  //      about 2%?).
521  //   2. Branch to addresses of direct branch instructions across binaries.
522  class BranchListParser : public PacketCallback {
523   private:
524    struct TraceData {
525      uint64_t addr = 0;
526      uint8_t addr_valid_bits = 0;
527      uint8_t isa = 0;
528      bool invalid_branch = false;
529      ETMBranchList branch;
530    };
531  
532   public:
BranchListParser(MapLocator & map_locator,const ETMDecoder::BranchListCallbackFn & callback)533    BranchListParser(MapLocator& map_locator, const ETMDecoder::BranchListCallbackFn& callback)
534        : PacketCallback(BRANCH_LIST_PARSER), map_locator_(map_locator), callback_(callback) {}
535  
CheckConfigs(std::unordered_map<uint8_t,std::unique_ptr<EtmV4Config>> & configs)536    void CheckConfigs(std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>>& configs) {
537      // TODO: Current implementation doesn't support non-zero speculation length and return stack.
538      for (auto& p : configs) {
539        if (p.second->MaxSpecDepth() > 0) {
540          LOG(WARNING) << "branch list collection isn't accurate with non-zero speculation length";
541          break;
542        }
543      }
544      for (auto& p : configs) {
545        if (p.second->enabledRetStack()) {
546          LOG(WARNING) << "branch list collection will lose some data with return stack enabled";
547          break;
548        }
549      }
550    }
551  
IsAddrPacket(const EtmV4ITrcPacket * pkt)552    bool IsAddrPacket(const EtmV4ITrcPacket* pkt) {
553      return pkt->getType() >= ETM4_PKT_I_ADDR_CTXT_L_32IS0 &&
554             pkt->getType() <= ETM4_PKT_I_ADDR_L_64IS1;
555    }
556  
IsAtomPacket(const EtmV4ITrcPacket * pkt)557    bool IsAtomPacket(const EtmV4ITrcPacket* pkt) { return pkt->getAtom().num > 0; }
558  
ProcessPacket(uint8_t trace_id,ocsd_datapath_op_t op,ocsd_trc_index_t,const EtmV4ITrcPacket * pkt)559    ocsd_datapath_resp_t ProcessPacket(uint8_t trace_id, ocsd_datapath_op_t op,
560                                       ocsd_trc_index_t /*index_sop */,
561                                       const EtmV4ITrcPacket* pkt) override {
562      TraceData& data = trace_data_[trace_id];
563      if (op == OCSD_OP_DATA) {
564        if (IsAddrPacket(pkt)) {
565          // Flush branch when seeing an Addr packet. Because it isn't correct to concatenate
566          // branches before and after an Addr packet.
567          FlushBranch(data);
568          data.addr = pkt->getAddrVal();
569          data.addr_valid_bits = pkt->v_addr.valid_bits;
570          data.isa = pkt->getAddrIS();
571        }
572  
573        if (IsAtomPacket(pkt)) {
574          // An atom packet contains a branch list. We may receive one or more atom packets in a row,
575          // and need to concatenate them.
576          ProcessAtomPacket(trace_id, data, pkt);
577        }
578  
579      } else {
580        // Flush branch when seeing a flush or reset operation.
581        FlushBranch(data);
582        if (op == OCSD_OP_RESET) {
583          data.addr = 0;
584          data.addr_valid_bits = 0;
585          data.isa = 0;
586          data.invalid_branch = false;
587        }
588      }
589      return OCSD_RESP_CONT;
590    }
591  
FinishData()592    void FinishData() {
593      for (auto& pair : trace_data_) {
594        FlushBranch(pair.second);
595      }
596    }
597  
598   private:
ProcessAtomPacket(uint8_t trace_id,TraceData & data,const EtmV4ITrcPacket * pkt)599    void ProcessAtomPacket(uint8_t trace_id, TraceData& data, const EtmV4ITrcPacket* pkt) {
600      if (data.invalid_branch) {
601        return;  // Skip atom packets when we think a branch list is invalid.
602      }
603      if (data.branch.branch.empty()) {
604        // This is the first atom packet in a branch list. Check if we have tid and addr info to
605        // parse it and the following atom packets. If not, mark the branch list as invalid.
606        if (map_locator_.GetTid(trace_id) == -1 || data.addr_valid_bits == 0) {
607          data.invalid_branch = true;
608          return;
609        }
610        const MapEntry* map = map_locator_.FindMap(trace_id, data.addr);
611        if (map == nullptr) {
612          data.invalid_branch = true;
613          return;
614        }
615        data.branch.dso = map->dso;
616        data.branch.addr = map->GetVaddrInFile(data.addr);
617        if (data.isa == 1) {  // thumb instruction, mark it in bit 0.
618          data.branch.addr |= 1;
619        }
620      }
621      uint32_t bits = pkt->atom.En_bits;
622      for (size_t i = 0; i < pkt->atom.num; i++) {
623        data.branch.branch.push_back((bits & 1) == 1);
624        bits >>= 1;
625      }
626    }
627  
FlushBranch(TraceData & data)628    void FlushBranch(TraceData& data) {
629      if (!data.branch.branch.empty()) {
630        callback_(data.branch);
631        data.branch.branch.clear();
632      }
633      data.invalid_branch = false;
634    }
635  
636    MapLocator& map_locator_;
637    ETMDecoder::BranchListCallbackFn callback_;
638    std::unordered_map<uint8_t, TraceData> trace_data_;
639  };
640  
641  // Etm data decoding in OpenCSD library has two steps:
642  // 1. From byte stream to etm packets. Each packet shows an event happened. For example,
643  // an Address packet shows the cpu is running the instruction at that address, an Atom
644  // packet shows whether the cpu decides to branch or not.
645  // 2. From etm packets to trace elements. To generates elements, the decoder needs both etm
646  // packets and executed binaries. For example, an InstructionRange element needs the decoder
647  // to find the next branch instruction starting from an address.
648  //
649  // ETMDecoderImpl uses OpenCSD library to decode etm data. It has the following properties:
650  // 1. Supports flexible decoding strategy. It allows installing packet callbacks and element
651  // callbacks, and decodes to either packets or elements based on requirements.
652  // 2. Supports dumping data at different stages.
653  class ETMDecoderImpl : public ETMDecoder {
654   public:
ETMDecoderImpl(ETMThreadTree & thread_tree)655    ETMDecoderImpl(ETMThreadTree& thread_tree) : thread_tree_(thread_tree) {
656      // If the aux record for a thread is processed after it's thread exit record, we can't find
657      // the thread's maps when processing ETM data. To handle this, disable thread exit records.
658      thread_tree.DisableThreadExitRecords();
659    }
660  
CreateDecodeTree(const AuxTraceInfoRecord & auxtrace_info)661    void CreateDecodeTree(const AuxTraceInfoRecord& auxtrace_info) {
662      uint8_t trace_id = 0;
663      uint64_t* info = auxtrace_info.data->info;
664      for (int i = 0; i < auxtrace_info.data->nr_cpu; i++) {
665        if (info[0] == AuxTraceInfoRecord::MAGIC_ETM4) {
666          auto& etm4 = *reinterpret_cast<AuxTraceInfoRecord::ETM4Info*>(info);
667          ocsd_etmv4_cfg cfg;
668          memset(&cfg, 0, sizeof(cfg));
669          cfg.reg_idr0 = etm4.trcidr0;
670          cfg.reg_idr1 = etm4.trcidr1;
671          cfg.reg_idr2 = etm4.trcidr2;
672          cfg.reg_idr8 = etm4.trcidr8;
673          cfg.reg_configr = etm4.trcconfigr;
674          cfg.reg_traceidr = etm4.trctraceidr;
675          cfg.arch_ver = ARCH_V8;
676          cfg.core_prof = profile_CortexA;
677          trace_id = cfg.reg_traceidr & 0x7f;
678          trace_ids_.emplace(etm4.cpu, trace_id);
679          configs_.emplace(trace_id, new EtmV4Config(&cfg));
680          info = reinterpret_cast<uint64_t*>(&etm4 + 1);
681        } else {
682          CHECK_EQ(info[0], AuxTraceInfoRecord::MAGIC_ETE);
683          auto& ete = *reinterpret_cast<AuxTraceInfoRecord::ETEInfo*>(info);
684          ocsd_ete_cfg cfg;
685          memset(&cfg, 0, sizeof(cfg));
686          cfg.reg_idr0 = ete.trcidr0;
687          cfg.reg_idr1 = ete.trcidr1;
688          cfg.reg_idr2 = ete.trcidr2;
689          cfg.reg_idr8 = ete.trcidr8;
690          cfg.reg_devarch = ete.trcdevarch;
691          cfg.reg_configr = ete.trcconfigr;
692          cfg.reg_traceidr = ete.trctraceidr;
693          cfg.arch_ver = ARCH_AA64;
694          cfg.core_prof = profile_CortexA;
695          trace_id = cfg.reg_traceidr & 0x7f;
696          trace_ids_.emplace(ete.cpu, trace_id);
697          configs_.emplace(trace_id, new ETEConfig(&cfg));
698          info = reinterpret_cast<uint64_t*>(&ete + 1);
699        }
700        decode_tree_.CreateDecoder(configs_[trace_id].get());
701        auto result = packet_sinks_.emplace(trace_id, trace_id);
702        CHECK(result.second);
703        decode_tree_.AttachPacketSink(trace_id, result.first->second);
704      }
705    }
706  
EnableDump(const ETMDumpOption & option)707    void EnableDump(const ETMDumpOption& option) override {
708      dumper_.reset(new DataDumper(decode_tree_));
709      if (option.dump_raw_data) {
710        dumper_->DumpRawData();
711      }
712      if (option.dump_packets) {
713        dumper_->DumpPackets(configs_);
714      }
715      if (option.dump_elements) {
716        dumper_->DumpElements();
717        InstallElementCallback(dumper_.get());
718      }
719    }
720  
RegisterCallback(const InstrRangeCallbackFn & callback)721    void RegisterCallback(const InstrRangeCallbackFn& callback) {
722      InstallMapLocator();
723      instr_range_parser_.reset(new InstrRangeParser(*map_locator_, callback));
724      InstallElementCallback(instr_range_parser_.get());
725    }
726  
RegisterCallback(const BranchListCallbackFn & callback)727    void RegisterCallback(const BranchListCallbackFn& callback) {
728      InstallMapLocator();
729      branch_list_parser_.reset(new BranchListParser(*map_locator_, callback));
730      branch_list_parser_->CheckConfigs(configs_);
731      InstallPacketCallback(branch_list_parser_.get());
732    }
733  
ProcessData(const uint8_t * data,size_t size,bool formatted,uint32_t cpu)734    bool ProcessData(const uint8_t* data, size_t size, bool formatted, uint32_t cpu) override {
735      // Reset decoders before processing each data block. Because:
736      // 1. Data blocks are not continuous. So decoders shouldn't keep previous states when
737      //    processing a new block.
738      // 2. The beginning part of a data block may be truncated if kernel buffer is temporarily full.
739      //    So we may see garbage data, which can cause decoding errors if we don't reset decoders.
740      LOG(DEBUG) << "Processing " << (!formatted ? "un" : "") << "formatted data with size " << size;
741      auto& decoder = formatted ? decode_tree_.GetFormattedDataIn()
742                                : decode_tree_.GetUnformattedDataIn(trace_ids_[cpu]);
743  
744      auto resp = decoder.TraceDataIn(OCSD_OP_RESET, data_index_, 0, nullptr, nullptr);
745      if (IsRespError(resp)) {
746        LOG(ERROR) << "failed to reset decoder, resp " << resp;
747        return false;
748      }
749      size_t left_size = size;
750      const size_t MAX_RESET_RETRY_COUNT = 3;
751      size_t reset_retry_count = 0;
752      while (left_size > 0) {
753        uint32_t processed;
754        auto resp = decoder.TraceDataIn(OCSD_OP_DATA, data_index_, left_size, data, &processed);
755        if (IsRespError(resp)) {
756          // A decoding error shouldn't ruin all data. Reset decoders to recover from it.
757          // But some errors may not be recoverable by resetting decoders. So use a max retry limit.
758          if (++reset_retry_count > MAX_RESET_RETRY_COUNT) {
759            break;
760          }
761          LOG(DEBUG) << "reset etm decoders for seeing a decode failure, resp " << resp
762                     << ", reset_retry_count is " << reset_retry_count;
763          decoder.TraceDataIn(OCSD_OP_RESET, data_index_ + processed, 0, nullptr, nullptr);
764        }
765        data += processed;
766        left_size -= processed;
767        data_index_ += processed;
768      }
769      return true;
770    }
771  
FinishData()772    bool FinishData() override {
773      if (instr_range_parser_) {
774        instr_range_parser_->FinishData();
775      }
776      if (branch_list_parser_) {
777        branch_list_parser_->FinishData();
778      }
779      return true;
780    }
781  
782   private:
InstallMapLocator()783    void InstallMapLocator() {
784      if (!map_locator_) {
785        map_locator_.reset(new MapLocator(thread_tree_));
786        for (auto& cfg : configs_) {
787          int64_t configr = (*(const ocsd_etmv4_cfg*)*cfg.second).reg_configr;
788          map_locator_->SetUseVmid(cfg.first,
789                                   configr & (1U << ETM4_CFG_BIT_VMID | 1U << ETM4_CFG_BIT_VMID_OPT));
790        }
791  
792        InstallPacketCallback(map_locator_.get());
793      }
794    }
795  
InstallPacketCallback(PacketCallback * callback)796    void InstallPacketCallback(PacketCallback* callback) {
797      for (auto& p : packet_sinks_) {
798        p.second.AddCallback(callback);
799      }
800    }
801  
InstallElementCallback(ElementCallback * callback)802    void InstallElementCallback(ElementCallback* callback) {
803      if (!packet_to_element_) {
804        InstallMapLocator();
805        packet_to_element_.reset(
806            new PacketToElement(*map_locator_, configs_, decode_tree_.ErrorLogger()));
807        InstallPacketCallback(packet_to_element_.get());
808      }
809      packet_to_element_->AddCallback(callback);
810    }
811  
812    // map ip address to binary path and binary offset
813    ETMThreadTree& thread_tree_;
814    // handle to build OpenCSD decoder
815    ETMV4IDecodeTree decode_tree_;
816    // map from cpu to trace id
817    std::unordered_map<uint64_t, uint8_t> trace_ids_;
818    // map from the trace id of an etm device to its config
819    std::unordered_map<uint8_t, std::unique_ptr<EtmV4Config>> configs_;
820    // map from the trace id of an etm device to its PacketSink
821    std::unordered_map<uint8_t, PacketSink> packet_sinks_;
822    std::unique_ptr<PacketToElement> packet_to_element_;
823    std::unique_ptr<DataDumper> dumper_;
824    // an index keeping processed etm data size
825    size_t data_index_ = 0;
826    std::unique_ptr<InstrRangeParser> instr_range_parser_;
827    std::unique_ptr<MapLocator> map_locator_;
828    std::unique_ptr<BranchListParser> branch_list_parser_;
829  };
830  
831  }  // namespace
832  
ParseEtmDumpOption(const std::string & s,ETMDumpOption * option)833  bool ParseEtmDumpOption(const std::string& s, ETMDumpOption* option) {
834    for (auto& value : android::base::Split(s, ",")) {
835      if (value == "raw") {
836        option->dump_raw_data = true;
837      } else if (value == "packet") {
838        option->dump_packets = true;
839      } else if (value == "element") {
840        option->dump_elements = true;
841      } else {
842        LOG(ERROR) << "unknown etm dump option: " << value;
843        return false;
844      }
845    }
846    return true;
847  }
848  
Create(const AuxTraceInfoRecord & auxtrace_info,ETMThreadTree & thread_tree)849  std::unique_ptr<ETMDecoder> ETMDecoder::Create(const AuxTraceInfoRecord& auxtrace_info,
850                                                 ETMThreadTree& thread_tree) {
851    auto decoder = std::make_unique<ETMDecoderImpl>(thread_tree);
852    decoder->CreateDecodeTree(auxtrace_info);
853    return std::unique_ptr<ETMDecoder>(decoder.release());
854  }
855  
856  // Use OpenCSD instruction decoder to convert branches to instruction addresses.
857  class BranchDecoder {
858   public:
Init(Dso * dso)859    android::base::expected<void, std::string> Init(Dso* dso) {
860      ElfStatus status;
861      elf_ = ElfFile::Open(dso->GetDebugFilePath(), &status);
862      if (!elf_) {
863        std::stringstream ss;
864        ss << status;
865        return android::base::unexpected(ss.str());
866      }
867      if (dso->type() == DSO_KERNEL_MODULE) {
868        // Kernel module doesn't have program header. So create a fake one mapping to .text section.
869        for (const auto& section : elf_->GetSectionHeader()) {
870          if (section.name == ".text") {
871            segments_.resize(1);
872            segments_[0].is_executable = true;
873            segments_[0].is_load = true;
874            segments_[0].file_offset = section.file_offset;
875            segments_[0].file_size = section.size;
876            segments_[0].vaddr = section.vaddr;
877            break;
878          }
879        }
880      } else {
881        segments_ = elf_->GetProgramHeader();
882        auto it = std::remove_if(segments_.begin(), segments_.end(),
883                                 [](const ElfSegment& s) { return !s.is_executable; });
884        segments_.resize(it - segments_.begin());
885      }
886      if (segments_.empty()) {
887        return android::base::unexpected("no segments");
888      }
889      buffer_ = elf_->GetMemoryBuffer();
890      return {};
891    }
892  
SetAddr(uint64_t addr,bool is_thumb)893    void SetAddr(uint64_t addr, bool is_thumb) {
894      memset(&instr_info_, 0, sizeof(instr_info_));
895      instr_info_.pe_type.arch = ARCH_V8;
896      instr_info_.pe_type.profile = profile_CortexA;
897      instr_info_.isa =
898          elf_->Is64Bit() ? ocsd_isa_aarch64 : (is_thumb ? ocsd_isa_thumb2 : ocsd_isa_arm);
899      instr_info_.instr_addr = addr;
900    }
901  
FindNextBranch()902    bool FindNextBranch() {
903      // Loop until we find a branch instruction.
904      while (ReadMem(instr_info_.instr_addr, 4, &instr_info_.opcode)) {
905        ocsd_err_t err = instruction_decoder_.DecodeInstruction(&instr_info_);
906        if (err != OCSD_OK) {
907          break;
908        }
909        if (instr_info_.type != OCSD_INSTR_OTHER) {
910          return true;
911        }
912        instr_info_.instr_addr += instr_info_.instr_size;
913      }
914      return false;
915    };
916  
InstrInfo()917    ocsd_instr_info& InstrInfo() { return instr_info_; }
918  
919   private:
ReadMem(uint64_t vaddr,size_t size,void * data)920    bool ReadMem(uint64_t vaddr, size_t size, void* data) {
921      for (auto& segment : segments_) {
922        if (vaddr >= segment.vaddr && vaddr + size <= segment.vaddr + segment.file_size) {
923          uint64_t offset = vaddr - segment.vaddr + segment.file_offset;
924          memcpy(data, buffer_->getBufferStart() + offset, size);
925          return true;
926        }
927      }
928      return false;
929    }
930  
931    std::unique_ptr<ElfFile> elf_;
932    std::vector<ElfSegment> segments_;
933    llvm::MemoryBuffer* buffer_ = nullptr;
934    ocsd_instr_info instr_info_;
935    InstructionDecoder instruction_decoder_;
936  };
937  
ConvertETMBranchMapToInstrRanges(Dso * dso,const ETMBranchMap & branch_map,const ETMDecoder::InstrRangeCallbackFn & callback)938  android::base::expected<void, std::string> ConvertETMBranchMapToInstrRanges(
939      Dso* dso, const ETMBranchMap& branch_map, const ETMDecoder::InstrRangeCallbackFn& callback) {
940    ETMInstrRange instr_range;
941    instr_range.dso = dso;
942  
943    BranchDecoder decoder;
944    if (auto result = decoder.Init(dso); !result.ok()) {
945      return result;
946    }
947  
948    for (const auto& addr_p : branch_map) {
949      uint64_t start_addr = addr_p.first & ~1ULL;
950      bool is_thumb = addr_p.first & 1;
951      for (const auto& branch_p : addr_p.second) {
952        const std::vector<bool>& branch = branch_p.first;
953        uint64_t count = branch_p.second;
954        decoder.SetAddr(start_addr, is_thumb);
955  
956        for (bool b : branch) {
957          ocsd_instr_info& instr = decoder.InstrInfo();
958          uint64_t from_addr = instr.instr_addr;
959          if (!decoder.FindNextBranch()) {
960            break;
961          }
962          bool end_with_branch = instr.type == OCSD_INSTR_BR || instr.type == OCSD_INSTR_BR_INDIRECT;
963          bool branch_taken = end_with_branch && b;
964          instr_range.start_addr = from_addr;
965          instr_range.end_addr = instr.instr_addr;
966          if (instr.type == OCSD_INSTR_BR) {
967            instr_range.branch_to_addr = instr.branch_addr;
968          } else {
969            instr_range.branch_to_addr = 0;
970          }
971          instr_range.branch_taken_count = branch_taken ? count : 0;
972          instr_range.branch_not_taken_count = branch_taken ? 0 : count;
973  
974          callback(instr_range);
975  
976          if (b) {
977            instr.instr_addr = instr.branch_addr;
978          } else {
979            instr.instr_addr += instr.instr_size;
980          }
981        }
982      }
983    }
984    return {};
985  }
986  
987  }  // namespace simpleperf
988