1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "report_utils.h"
18 
19 #include <stdlib.h>
20 
21 #include <android-base/parsebool.h>
22 #include <android-base/scopeguard.h>
23 #include <android-base/strings.h>
24 
25 #include "JITDebugReader.h"
26 #include "RegEx.h"
27 #include "utils.h"
28 
29 namespace simpleperf {
30 
AddProguardMappingFile(std::string_view mapping_file)31 bool ProguardMappingRetrace::AddProguardMappingFile(std::string_view mapping_file) {
32   // The mapping file format is described in
33   // https://www.guardsquare.com/en/products/proguard/manual/retrace.
34   // Additional info provided by R8 is described in
35   // https://r8.googlesource.com/r8/+/refs/heads/main/doc/retrace.md.
36   line_reader_.reset(new LineReader(mapping_file));
37   android::base::ScopeGuard g([&]() { line_reader_ = nullptr; });
38 
39   if (!line_reader_->Ok()) {
40     PLOG(ERROR) << "failed to read " << mapping_file;
41     return false;
42   }
43 
44   MoveToNextLine();
45   while (cur_line_.type != LineType::LINE_EOF) {
46     if (cur_line_.type == LineType::CLASS_LINE) {
47       // Match line "original_classname -> obfuscated_classname:".
48       std::string_view s = cur_line_.data;
49       auto arrow_pos = s.find(" -> ");
50       auto arrow_end_pos = arrow_pos + strlen(" -> ");
51       if (auto colon_pos = s.find(':', arrow_end_pos); colon_pos != s.npos) {
52         std::string_view original_classname = s.substr(0, arrow_pos);
53         std::string obfuscated_classname(s.substr(arrow_end_pos, colon_pos - arrow_end_pos));
54         MappingClass& cur_class = class_map_[obfuscated_classname];
55         cur_class.original_classname = original_classname;
56         MoveToNextLine();
57         if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
58           cur_class.synthesized = true;
59           MoveToNextLine();
60         }
61 
62         while (cur_line_.type == LineType::METHOD_LINE) {
63           ParseMethod(cur_class);
64         }
65         continue;
66       }
67     }
68 
69     // Skip unparsed line.
70     MoveToNextLine();
71   }
72   return true;
73 }
74 
ParseMethod(MappingClass & mapping_class)75 void ProguardMappingRetrace::ParseMethod(MappingClass& mapping_class) {
76   // Match line "... [original_classname.]original_methodname(...)... -> obfuscated_methodname".
77   std::string_view s = cur_line_.data;
78   auto arrow_pos = s.find(" -> ");
79   auto arrow_end_pos = arrow_pos + strlen(" -> ");
80   if (auto left_brace_pos = s.rfind('(', arrow_pos); left_brace_pos != s.npos) {
81     if (auto space_pos = s.rfind(' ', left_brace_pos); space_pos != s.npos) {
82       std::string_view name = s.substr(space_pos + 1, left_brace_pos - space_pos - 1);
83       bool contains_classname = name.find('.') != name.npos;
84       if (contains_classname && android::base::StartsWith(name, mapping_class.original_classname)) {
85         name.remove_prefix(mapping_class.original_classname.size() + 1);
86         contains_classname = false;
87       }
88       std::string original_methodname(name);
89       std::string obfuscated_methodname(s.substr(arrow_end_pos));
90       bool synthesized = false;
91 
92       MoveToNextLine();
93       if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
94         synthesized = true;
95         MoveToNextLine();
96       }
97 
98       auto& method_map = mapping_class.method_map;
99       if (auto it = method_map.find(obfuscated_methodname); it != method_map.end()) {
100         // The obfuscated method name already exists. We don't know which one to choose.
101         // So just prefer the latter one unless it's synthesized.
102         if (!synthesized) {
103           it->second.original_name = original_methodname;
104           it->second.contains_classname = contains_classname;
105           it->second.synthesized = synthesized;
106         }
107       } else {
108         auto& method = method_map[obfuscated_methodname];
109         method.original_name = original_methodname;
110         method.contains_classname = contains_classname;
111         method.synthesized = synthesized;
112       }
113       return;
114     }
115   }
116 
117   // Skip unparsed line.
118   MoveToNextLine();
119 }
120 
MoveToNextLine()121 void ProguardMappingRetrace::MoveToNextLine() {
122   std::string* line;
123   while ((line = line_reader_->ReadLine()) != nullptr) {
124     std::string_view s = *line;
125     if (s.empty()) {
126       continue;
127     }
128     size_t non_space_pos = s.find_first_not_of(' ');
129     if (non_space_pos != s.npos && s[non_space_pos] == '#') {
130       // Skip all comments unless it's synthesized comment.
131       if (s.find("com.android.tools.r8.synthesized") != s.npos) {
132         cur_line_.type = SYNTHESIZED_COMMENT;
133         cur_line_.data = s;
134         return;
135       }
136       continue;
137     }
138     if (s.find(" -> ") == s.npos) {
139       // Skip unknown lines.
140       continue;
141     }
142     cur_line_.data = s;
143     if (s[0] == ' ') {
144       cur_line_.type = METHOD_LINE;
145     } else {
146       cur_line_.type = CLASS_LINE;
147     }
148     return;
149   }
150   cur_line_.type = LINE_EOF;
151 }
152 
DeObfuscateJavaMethods(std::string_view obfuscated_name,std::string * original_name,bool * synthesized)153 bool ProguardMappingRetrace::DeObfuscateJavaMethods(std::string_view obfuscated_name,
154                                                     std::string* original_name, bool* synthesized) {
155   if (auto split_pos = obfuscated_name.rfind('.'); split_pos != obfuscated_name.npos) {
156     std::string obfuscated_classname(obfuscated_name.substr(0, split_pos));
157 
158     if (auto it = class_map_.find(obfuscated_classname); it != class_map_.end()) {
159       const MappingClass& mapping_class = it->second;
160       const auto& method_map = mapping_class.method_map;
161       std::string obfuscated_methodname(obfuscated_name.substr(split_pos + 1));
162 
163       if (auto method_it = method_map.find(obfuscated_methodname); method_it != method_map.end()) {
164         const auto& method = method_it->second;
165         if (method.contains_classname) {
166           *original_name = method.original_name;
167         } else {
168           *original_name = mapping_class.original_classname + "." + method.original_name;
169         }
170         *synthesized = method.synthesized;
171       } else {
172         // Only the classname is obfuscated.
173         *original_name = mapping_class.original_classname + "." + obfuscated_methodname;
174         *synthesized = mapping_class.synthesized;
175       }
176       return true;
177     }
178   }
179   return false;
180 }
181 
IsArtEntry(const CallChainReportEntry & entry,bool * is_jni_trampoline)182 static bool IsArtEntry(const CallChainReportEntry& entry, bool* is_jni_trampoline) {
183   if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
184     // art_jni_trampoline/art_quick_generic_jni_trampoline are trampolines used to call jni
185     // methods in art runtime. We want to hide them when hiding art frames.
186     *is_jni_trampoline = android::base::EndsWith(entry.symbol->Name(), "jni_trampoline");
187     return *is_jni_trampoline || android::base::EndsWith(entry.dso->Path(), "/libart.so") ||
188            android::base::EndsWith(entry.dso->Path(), "/libartd.so");
189   }
190   return false;
191 };
192 
~CallChainReportModifier()193 CallChainReportModifier::~CallChainReportModifier() {}
194 
195 // Remove art frames.
196 class ArtFrameRemover : public CallChainReportModifier {
197  public:
Modify(std::vector<CallChainReportEntry> & callchain)198   void Modify(std::vector<CallChainReportEntry>& callchain) override {
199     auto it =
200         std::remove_if(callchain.begin(), callchain.end(), [](const CallChainReportEntry& entry) {
201           return entry.execution_type == CallChainExecutionType::ART_METHOD;
202         });
203     callchain.erase(it, callchain.end());
204   }
205 };
206 
207 // Convert JIT methods to their corresponding interpreted Java methods.
208 class JITFrameConverter : public CallChainReportModifier {
209  public:
JITFrameConverter(const ThreadTree & thread_tree)210   JITFrameConverter(const ThreadTree& thread_tree) : thread_tree_(thread_tree) {}
211 
Modify(std::vector<CallChainReportEntry> & callchain)212   void Modify(std::vector<CallChainReportEntry>& callchain) override {
213     CollectJavaMethods();
214     for (size_t i = 0; i < callchain.size();) {
215       auto& entry = callchain[i];
216       if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
217         // This is a JIT java method, merge it with the interpreted java method having the same
218         // name if possible. Otherwise, merge it with other JIT java methods having the same name
219         // by assigning a common dso_name.
220         if (auto it = java_method_map_.find(std::string(entry.symbol->FunctionName()));
221             it != java_method_map_.end()) {
222           entry.dso = it->second.dso;
223           entry.symbol = it->second.symbol;
224           // Not enough info to map an offset in a JIT method to an offset in a dex file. So just
225           // use the symbol_addr.
226           entry.vaddr_in_file = entry.symbol->addr;
227 
228           // ART may call from an interpreted Java method into its corresponding JIT method. To
229           // avoid showing the method calling itself, remove the JIT frame.
230           if (i + 1 < callchain.size() && callchain[i + 1].dso == entry.dso &&
231               callchain[i + 1].symbol == entry.symbol) {
232             callchain.erase(callchain.begin() + i);
233             continue;
234           }
235 
236         } else if (!JITDebugReader::IsPathInJITSymFile(entry.dso->Path())) {
237           // Old JITSymFiles use names like "TemporaryFile-XXXXXX". So give them a better name.
238           entry.dso_name = "[JIT cache]";
239         }
240       }
241       i++;
242     }
243   }
244 
245  private:
246   struct JavaMethod {
247     Dso* dso;
248     const Symbol* symbol;
JavaMethodsimpleperf::JITFrameConverter::JavaMethod249     JavaMethod(Dso* dso, const Symbol* symbol) : dso(dso), symbol(symbol) {}
250   };
251 
CollectJavaMethods()252   void CollectJavaMethods() {
253     if (!java_method_initialized_) {
254       java_method_initialized_ = true;
255       for (Dso* dso : thread_tree_.GetAllDsos()) {
256         if (dso->type() == DSO_DEX_FILE) {
257           dso->LoadSymbols();
258           for (auto& symbol : dso->GetSymbols()) {
259             java_method_map_.emplace(symbol.Name(), JavaMethod(dso, &symbol));
260           }
261         }
262       }
263     }
264   }
265 
266   const ThreadTree& thread_tree_;
267   bool java_method_initialized_ = false;
268   std::unordered_map<std::string, JavaMethod> java_method_map_;
269 };
270 
271 // Use proguard mapping.txt to de-obfuscate minified symbols.
272 class JavaMethodDeobfuscater : public CallChainReportModifier {
273  public:
JavaMethodDeobfuscater(bool remove_r8_synthesized_frame)274   JavaMethodDeobfuscater(bool remove_r8_synthesized_frame)
275       : remove_r8_synthesized_frame_(remove_r8_synthesized_frame) {}
276 
AddProguardMappingFile(std::string_view mapping_file)277   bool AddProguardMappingFile(std::string_view mapping_file) {
278     return retrace_.AddProguardMappingFile(mapping_file);
279   }
280 
Modify(std::vector<CallChainReportEntry> & callchain)281   void Modify(std::vector<CallChainReportEntry>& callchain) override {
282     for (size_t i = 0; i < callchain.size();) {
283       auto& entry = callchain[i];
284       if (!IsJavaEntry(entry)) {
285         i++;
286         continue;
287       }
288       std::string_view name = entry.symbol->FunctionName();
289       std::string original_name;
290       bool synthesized;
291       if (retrace_.DeObfuscateJavaMethods(name, &original_name, &synthesized)) {
292         if (synthesized && remove_r8_synthesized_frame_) {
293           callchain.erase(callchain.begin() + i);
294           continue;
295         }
296         entry.symbol->SetDemangledName(original_name);
297       }
298       i++;
299     }
300   }
301 
302  private:
IsJavaEntry(const CallChainReportEntry & entry)303   bool IsJavaEntry(const CallChainReportEntry& entry) {
304     static const char* COMPILED_JAVA_FILE_SUFFIXES[] = {".odex", ".oat", ".dex"};
305     if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD ||
306         entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD) {
307       return true;
308     }
309     if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
310       const std::string& path = entry.dso->Path();
311       for (const char* suffix : COMPILED_JAVA_FILE_SUFFIXES) {
312         if (android::base::EndsWith(path, suffix)) {
313           return true;
314         }
315       }
316     }
317     return false;
318   }
319 
320   const bool remove_r8_synthesized_frame_;
321   ProguardMappingRetrace retrace_;
322 };
323 
324 // Use regex to filter method names.
325 class MethodNameFilter : public CallChainReportModifier {
326  public:
RemoveMethod(std::string_view method_name_regex)327   bool RemoveMethod(std::string_view method_name_regex) {
328     if (auto regex = RegEx::Create(method_name_regex); regex != nullptr) {
329       exclude_names_.emplace_back(std::move(regex));
330       return true;
331     }
332     return false;
333   }
334 
Modify(std::vector<CallChainReportEntry> & callchain)335   void Modify(std::vector<CallChainReportEntry>& callchain) override {
336     auto it = std::remove_if(callchain.begin(), callchain.end(),
337                              [this](const CallChainReportEntry& entry) {
338                                return SearchInRegs(entry.symbol->DemangledName(), exclude_names_);
339                              });
340     callchain.erase(it, callchain.end());
341   }
342 
343  private:
344   std::vector<std::unique_ptr<RegEx>> exclude_names_;
345 };
346 
CallChainReportBuilder(ThreadTree & thread_tree)347 CallChainReportBuilder::CallChainReportBuilder(ThreadTree& thread_tree)
348     : thread_tree_(thread_tree) {
349   const char* env_name = "REMOVE_R8_SYNTHESIZED_FRAME";
350   const char* s = getenv(env_name);
351   if (s != nullptr) {
352     auto result = android::base::ParseBool(s);
353     if (result == android::base::ParseBoolResult::kError) {
354       LOG(WARNING) << "invalid value in env variable " << env_name;
355     } else if (result == android::base::ParseBoolResult::kTrue) {
356       LOG(INFO) << "R8 synthesized frames will be removed.";
357       remove_r8_synthesized_frame_ = true;
358     }
359   }
360   SetRemoveArtFrame(true);
361   SetConvertJITFrame(true);
362 }
363 
SetRemoveArtFrame(bool enable)364 void CallChainReportBuilder::SetRemoveArtFrame(bool enable) {
365   if (enable) {
366     art_frame_remover_.reset(new ArtFrameRemover);
367   } else {
368     art_frame_remover_.reset(nullptr);
369   }
370 }
371 
SetConvertJITFrame(bool enable)372 void CallChainReportBuilder::SetConvertJITFrame(bool enable) {
373   if (enable) {
374     jit_frame_converter_.reset(new JITFrameConverter(thread_tree_));
375   } else {
376     jit_frame_converter_.reset(nullptr);
377   }
378 }
379 
AddProguardMappingFile(std::string_view mapping_file)380 bool CallChainReportBuilder::AddProguardMappingFile(std::string_view mapping_file) {
381   if (!java_method_deobfuscater_) {
382     java_method_deobfuscater_.reset(new JavaMethodDeobfuscater(remove_r8_synthesized_frame_));
383   }
384   return static_cast<JavaMethodDeobfuscater&>(*java_method_deobfuscater_)
385       .AddProguardMappingFile(mapping_file);
386 }
387 
RemoveMethod(std::string_view method_name_regex)388 bool CallChainReportBuilder::RemoveMethod(std::string_view method_name_regex) {
389   if (!method_name_filter_) {
390     method_name_filter_.reset(new MethodNameFilter);
391   }
392   return static_cast<MethodNameFilter&>(*method_name_filter_).RemoveMethod(method_name_regex);
393 }
394 
Build(const ThreadEntry * thread,const std::vector<uint64_t> & ips,size_t kernel_ip_count)395 std::vector<CallChainReportEntry> CallChainReportBuilder::Build(const ThreadEntry* thread,
396                                                                 const std::vector<uint64_t>& ips,
397                                                                 size_t kernel_ip_count) {
398   std::vector<CallChainReportEntry> result;
399   result.reserve(ips.size());
400   for (size_t i = 0; i < ips.size(); i++) {
401     const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
402     Dso* dso = map->dso;
403     uint64_t vaddr_in_file;
404     const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], &vaddr_in_file, &dso);
405     CallChainExecutionType execution_type = CallChainExecutionType::NATIVE_METHOD;
406     if (dso->IsForJavaMethod()) {
407       if (dso->type() == DSO_DEX_FILE) {
408         execution_type = CallChainExecutionType::INTERPRETED_JVM_METHOD;
409       } else {
410         execution_type = CallChainExecutionType::JIT_JVM_METHOD;
411       }
412     }
413     result.resize(result.size() + 1);
414     auto& entry = result.back();
415     entry.ip = ips[i];
416     entry.symbol = symbol;
417     entry.dso = dso;
418     entry.vaddr_in_file = vaddr_in_file;
419     entry.map = map;
420     entry.execution_type = execution_type;
421   }
422   MarkArtFrame(result);
423   if (art_frame_remover_) {
424     art_frame_remover_->Modify(result);
425   }
426   if (jit_frame_converter_) {
427     jit_frame_converter_->Modify(result);
428   }
429   if (java_method_deobfuscater_) {
430     java_method_deobfuscater_->Modify(result);
431   }
432   if (method_name_filter_) {
433     method_name_filter_->Modify(result);
434   }
435   return result;
436 }
437 
MarkArtFrame(std::vector<CallChainReportEntry> & callchain)438 void CallChainReportBuilder::MarkArtFrame(std::vector<CallChainReportEntry>& callchain) {
439   // Mark art methods before or after a JVM method.
440   bool near_java_method = false;
441   bool is_jni_trampoline = false;
442   std::vector<size_t> jni_trampoline_positions;
443   for (size_t i = 0; i < callchain.size(); ++i) {
444     auto& entry = callchain[i];
445     if (entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD ||
446         entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
447       near_java_method = true;
448 
449       // Mark art frames before this entry.
450       for (int j = static_cast<int>(i) - 1; j >= 0; j--) {
451         if (!IsArtEntry(callchain[j], &is_jni_trampoline)) {
452           break;
453         }
454         callchain[j].execution_type = CallChainExecutionType::ART_METHOD;
455         if (is_jni_trampoline) {
456           jni_trampoline_positions.push_back(j);
457         }
458       }
459     } else if (near_java_method && IsArtEntry(entry, &is_jni_trampoline)) {
460       entry.execution_type = CallChainExecutionType::ART_METHOD;
461       if (is_jni_trampoline) {
462         jni_trampoline_positions.push_back(i);
463       }
464     } else {
465       near_java_method = false;
466     }
467   }
468   // Functions called by art_jni_trampoline are jni methods. And we don't want to hide them.
469   for (auto i : jni_trampoline_positions) {
470     if (i > 0 && callchain[i - 1].execution_type == CallChainExecutionType::ART_METHOD) {
471       callchain[i - 1].execution_type = CallChainExecutionType::NATIVE_METHOD;
472     }
473   }
474 }
475 
AggregateThreads(const std::vector<std::string> & thread_name_regex)476 bool ThreadReportBuilder::AggregateThreads(const std::vector<std::string>& thread_name_regex) {
477   size_t i = thread_regs_.size();
478   thread_regs_.resize(i + thread_name_regex.size());
479   for (const auto& reg_str : thread_name_regex) {
480     std::unique_ptr<RegEx> re = RegEx::Create(reg_str);
481     if (!re) {
482       return false;
483     }
484     thread_regs_[i++].re = std::move(re);
485   }
486   return true;
487 }
488 
Build(const ThreadEntry & thread)489 ThreadReport ThreadReportBuilder::Build(const ThreadEntry& thread) {
490   ThreadReport report(thread.pid, thread.tid, thread.comm);
491   ModifyReportToAggregateThreads(report);
492   return report;
493 }
494 
ModifyReportToAggregateThreads(ThreadReport & report)495 void ThreadReportBuilder::ModifyReportToAggregateThreads(ThreadReport& report) {
496   if (thread_regs_.empty()) {
497     // No modification when there are no regular expressions.
498     return;
499   }
500   const std::string thread_name = report.thread_name;
501   if (auto it = thread_map_.find(thread_name); it != thread_map_.end()) {
502     // Found cached result in thread_map_.
503     if (it->second != -1) {
504       report = thread_regs_[it->second].report;
505     }
506     return;
507   }
508   // Run the slow path to walk through every regular expression.
509   size_t index;
510   for (index = 0; index < thread_regs_.size(); ++index) {
511     if (thread_regs_[index].re->Match(thread_name)) {
512       break;
513     }
514   }
515   if (index == thread_regs_.size()) {
516     thread_map_[thread_name] = -1;
517   } else {
518     thread_map_[thread_name] = static_cast<int>(index);
519     // Modify thread report.
520     auto& aggregated_report = thread_regs_[index].report;
521     if (aggregated_report.thread_name == nullptr) {
522       // Use regular expression as the name of the aggregated thread. So users know it's an
523       // aggregated thread.
524       aggregated_report =
525           ThreadReport(report.pid, report.tid, thread_regs_[index].re->GetPattern().c_str());
526     }
527     report = aggregated_report;
528   }
529 }
530 
531 }  // namespace simpleperf
532