1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "report_utils.h"
18
19 #include <stdlib.h>
20
21 #include <android-base/parsebool.h>
22 #include <android-base/scopeguard.h>
23 #include <android-base/strings.h>
24
25 #include "JITDebugReader.h"
26 #include "RegEx.h"
27 #include "utils.h"
28
29 namespace simpleperf {
30
AddProguardMappingFile(std::string_view mapping_file)31 bool ProguardMappingRetrace::AddProguardMappingFile(std::string_view mapping_file) {
32 // The mapping file format is described in
33 // https://www.guardsquare.com/en/products/proguard/manual/retrace.
34 // Additional info provided by R8 is described in
35 // https://r8.googlesource.com/r8/+/refs/heads/main/doc/retrace.md.
36 line_reader_.reset(new LineReader(mapping_file));
37 android::base::ScopeGuard g([&]() { line_reader_ = nullptr; });
38
39 if (!line_reader_->Ok()) {
40 PLOG(ERROR) << "failed to read " << mapping_file;
41 return false;
42 }
43
44 MoveToNextLine();
45 while (cur_line_.type != LineType::LINE_EOF) {
46 if (cur_line_.type == LineType::CLASS_LINE) {
47 // Match line "original_classname -> obfuscated_classname:".
48 std::string_view s = cur_line_.data;
49 auto arrow_pos = s.find(" -> ");
50 auto arrow_end_pos = arrow_pos + strlen(" -> ");
51 if (auto colon_pos = s.find(':', arrow_end_pos); colon_pos != s.npos) {
52 std::string_view original_classname = s.substr(0, arrow_pos);
53 std::string obfuscated_classname(s.substr(arrow_end_pos, colon_pos - arrow_end_pos));
54 MappingClass& cur_class = class_map_[obfuscated_classname];
55 cur_class.original_classname = original_classname;
56 MoveToNextLine();
57 if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
58 cur_class.synthesized = true;
59 MoveToNextLine();
60 }
61
62 while (cur_line_.type == LineType::METHOD_LINE) {
63 ParseMethod(cur_class);
64 }
65 continue;
66 }
67 }
68
69 // Skip unparsed line.
70 MoveToNextLine();
71 }
72 return true;
73 }
74
ParseMethod(MappingClass & mapping_class)75 void ProguardMappingRetrace::ParseMethod(MappingClass& mapping_class) {
76 // Match line "... [original_classname.]original_methodname(...)... -> obfuscated_methodname".
77 std::string_view s = cur_line_.data;
78 auto arrow_pos = s.find(" -> ");
79 auto arrow_end_pos = arrow_pos + strlen(" -> ");
80 if (auto left_brace_pos = s.rfind('(', arrow_pos); left_brace_pos != s.npos) {
81 if (auto space_pos = s.rfind(' ', left_brace_pos); space_pos != s.npos) {
82 std::string_view name = s.substr(space_pos + 1, left_brace_pos - space_pos - 1);
83 bool contains_classname = name.find('.') != name.npos;
84 if (contains_classname && android::base::StartsWith(name, mapping_class.original_classname)) {
85 name.remove_prefix(mapping_class.original_classname.size() + 1);
86 contains_classname = false;
87 }
88 std::string original_methodname(name);
89 std::string obfuscated_methodname(s.substr(arrow_end_pos));
90 bool synthesized = false;
91
92 MoveToNextLine();
93 if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
94 synthesized = true;
95 MoveToNextLine();
96 }
97
98 auto& method_map = mapping_class.method_map;
99 if (auto it = method_map.find(obfuscated_methodname); it != method_map.end()) {
100 // The obfuscated method name already exists. We don't know which one to choose.
101 // So just prefer the latter one unless it's synthesized.
102 if (!synthesized) {
103 it->second.original_name = original_methodname;
104 it->second.contains_classname = contains_classname;
105 it->second.synthesized = synthesized;
106 }
107 } else {
108 auto& method = method_map[obfuscated_methodname];
109 method.original_name = original_methodname;
110 method.contains_classname = contains_classname;
111 method.synthesized = synthesized;
112 }
113 return;
114 }
115 }
116
117 // Skip unparsed line.
118 MoveToNextLine();
119 }
120
MoveToNextLine()121 void ProguardMappingRetrace::MoveToNextLine() {
122 std::string* line;
123 while ((line = line_reader_->ReadLine()) != nullptr) {
124 std::string_view s = *line;
125 if (s.empty()) {
126 continue;
127 }
128 size_t non_space_pos = s.find_first_not_of(' ');
129 if (non_space_pos != s.npos && s[non_space_pos] == '#') {
130 // Skip all comments unless it's synthesized comment.
131 if (s.find("com.android.tools.r8.synthesized") != s.npos) {
132 cur_line_.type = SYNTHESIZED_COMMENT;
133 cur_line_.data = s;
134 return;
135 }
136 continue;
137 }
138 if (s.find(" -> ") == s.npos) {
139 // Skip unknown lines.
140 continue;
141 }
142 cur_line_.data = s;
143 if (s[0] == ' ') {
144 cur_line_.type = METHOD_LINE;
145 } else {
146 cur_line_.type = CLASS_LINE;
147 }
148 return;
149 }
150 cur_line_.type = LINE_EOF;
151 }
152
DeObfuscateJavaMethods(std::string_view obfuscated_name,std::string * original_name,bool * synthesized)153 bool ProguardMappingRetrace::DeObfuscateJavaMethods(std::string_view obfuscated_name,
154 std::string* original_name, bool* synthesized) {
155 if (auto split_pos = obfuscated_name.rfind('.'); split_pos != obfuscated_name.npos) {
156 std::string obfuscated_classname(obfuscated_name.substr(0, split_pos));
157
158 if (auto it = class_map_.find(obfuscated_classname); it != class_map_.end()) {
159 const MappingClass& mapping_class = it->second;
160 const auto& method_map = mapping_class.method_map;
161 std::string obfuscated_methodname(obfuscated_name.substr(split_pos + 1));
162
163 if (auto method_it = method_map.find(obfuscated_methodname); method_it != method_map.end()) {
164 const auto& method = method_it->second;
165 if (method.contains_classname) {
166 *original_name = method.original_name;
167 } else {
168 *original_name = mapping_class.original_classname + "." + method.original_name;
169 }
170 *synthesized = method.synthesized;
171 } else {
172 // Only the classname is obfuscated.
173 *original_name = mapping_class.original_classname + "." + obfuscated_methodname;
174 *synthesized = mapping_class.synthesized;
175 }
176 return true;
177 }
178 }
179 return false;
180 }
181
IsArtEntry(const CallChainReportEntry & entry,bool * is_jni_trampoline)182 static bool IsArtEntry(const CallChainReportEntry& entry, bool* is_jni_trampoline) {
183 if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
184 // art_jni_trampoline/art_quick_generic_jni_trampoline are trampolines used to call jni
185 // methods in art runtime. We want to hide them when hiding art frames.
186 *is_jni_trampoline = android::base::EndsWith(entry.symbol->Name(), "jni_trampoline");
187 return *is_jni_trampoline || android::base::EndsWith(entry.dso->Path(), "/libart.so") ||
188 android::base::EndsWith(entry.dso->Path(), "/libartd.so");
189 }
190 return false;
191 };
192
~CallChainReportModifier()193 CallChainReportModifier::~CallChainReportModifier() {}
194
195 // Remove art frames.
196 class ArtFrameRemover : public CallChainReportModifier {
197 public:
Modify(std::vector<CallChainReportEntry> & callchain)198 void Modify(std::vector<CallChainReportEntry>& callchain) override {
199 auto it =
200 std::remove_if(callchain.begin(), callchain.end(), [](const CallChainReportEntry& entry) {
201 return entry.execution_type == CallChainExecutionType::ART_METHOD;
202 });
203 callchain.erase(it, callchain.end());
204 }
205 };
206
207 // Convert JIT methods to their corresponding interpreted Java methods.
208 class JITFrameConverter : public CallChainReportModifier {
209 public:
JITFrameConverter(const ThreadTree & thread_tree)210 JITFrameConverter(const ThreadTree& thread_tree) : thread_tree_(thread_tree) {}
211
Modify(std::vector<CallChainReportEntry> & callchain)212 void Modify(std::vector<CallChainReportEntry>& callchain) override {
213 CollectJavaMethods();
214 for (size_t i = 0; i < callchain.size();) {
215 auto& entry = callchain[i];
216 if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
217 // This is a JIT java method, merge it with the interpreted java method having the same
218 // name if possible. Otherwise, merge it with other JIT java methods having the same name
219 // by assigning a common dso_name.
220 if (auto it = java_method_map_.find(std::string(entry.symbol->FunctionName()));
221 it != java_method_map_.end()) {
222 entry.dso = it->second.dso;
223 entry.symbol = it->second.symbol;
224 // Not enough info to map an offset in a JIT method to an offset in a dex file. So just
225 // use the symbol_addr.
226 entry.vaddr_in_file = entry.symbol->addr;
227
228 // ART may call from an interpreted Java method into its corresponding JIT method. To
229 // avoid showing the method calling itself, remove the JIT frame.
230 if (i + 1 < callchain.size() && callchain[i + 1].dso == entry.dso &&
231 callchain[i + 1].symbol == entry.symbol) {
232 callchain.erase(callchain.begin() + i);
233 continue;
234 }
235
236 } else if (!JITDebugReader::IsPathInJITSymFile(entry.dso->Path())) {
237 // Old JITSymFiles use names like "TemporaryFile-XXXXXX". So give them a better name.
238 entry.dso_name = "[JIT cache]";
239 }
240 }
241 i++;
242 }
243 }
244
245 private:
246 struct JavaMethod {
247 Dso* dso;
248 const Symbol* symbol;
JavaMethodsimpleperf::JITFrameConverter::JavaMethod249 JavaMethod(Dso* dso, const Symbol* symbol) : dso(dso), symbol(symbol) {}
250 };
251
CollectJavaMethods()252 void CollectJavaMethods() {
253 if (!java_method_initialized_) {
254 java_method_initialized_ = true;
255 for (Dso* dso : thread_tree_.GetAllDsos()) {
256 if (dso->type() == DSO_DEX_FILE) {
257 dso->LoadSymbols();
258 for (auto& symbol : dso->GetSymbols()) {
259 java_method_map_.emplace(symbol.Name(), JavaMethod(dso, &symbol));
260 }
261 }
262 }
263 }
264 }
265
266 const ThreadTree& thread_tree_;
267 bool java_method_initialized_ = false;
268 std::unordered_map<std::string, JavaMethod> java_method_map_;
269 };
270
271 // Use proguard mapping.txt to de-obfuscate minified symbols.
272 class JavaMethodDeobfuscater : public CallChainReportModifier {
273 public:
JavaMethodDeobfuscater(bool remove_r8_synthesized_frame)274 JavaMethodDeobfuscater(bool remove_r8_synthesized_frame)
275 : remove_r8_synthesized_frame_(remove_r8_synthesized_frame) {}
276
AddProguardMappingFile(std::string_view mapping_file)277 bool AddProguardMappingFile(std::string_view mapping_file) {
278 return retrace_.AddProguardMappingFile(mapping_file);
279 }
280
Modify(std::vector<CallChainReportEntry> & callchain)281 void Modify(std::vector<CallChainReportEntry>& callchain) override {
282 for (size_t i = 0; i < callchain.size();) {
283 auto& entry = callchain[i];
284 if (!IsJavaEntry(entry)) {
285 i++;
286 continue;
287 }
288 std::string_view name = entry.symbol->FunctionName();
289 std::string original_name;
290 bool synthesized;
291 if (retrace_.DeObfuscateJavaMethods(name, &original_name, &synthesized)) {
292 if (synthesized && remove_r8_synthesized_frame_) {
293 callchain.erase(callchain.begin() + i);
294 continue;
295 }
296 entry.symbol->SetDemangledName(original_name);
297 }
298 i++;
299 }
300 }
301
302 private:
IsJavaEntry(const CallChainReportEntry & entry)303 bool IsJavaEntry(const CallChainReportEntry& entry) {
304 static const char* COMPILED_JAVA_FILE_SUFFIXES[] = {".odex", ".oat", ".dex"};
305 if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD ||
306 entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD) {
307 return true;
308 }
309 if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
310 const std::string& path = entry.dso->Path();
311 for (const char* suffix : COMPILED_JAVA_FILE_SUFFIXES) {
312 if (android::base::EndsWith(path, suffix)) {
313 return true;
314 }
315 }
316 }
317 return false;
318 }
319
320 const bool remove_r8_synthesized_frame_;
321 ProguardMappingRetrace retrace_;
322 };
323
324 // Use regex to filter method names.
325 class MethodNameFilter : public CallChainReportModifier {
326 public:
RemoveMethod(std::string_view method_name_regex)327 bool RemoveMethod(std::string_view method_name_regex) {
328 if (auto regex = RegEx::Create(method_name_regex); regex != nullptr) {
329 exclude_names_.emplace_back(std::move(regex));
330 return true;
331 }
332 return false;
333 }
334
Modify(std::vector<CallChainReportEntry> & callchain)335 void Modify(std::vector<CallChainReportEntry>& callchain) override {
336 auto it = std::remove_if(callchain.begin(), callchain.end(),
337 [this](const CallChainReportEntry& entry) {
338 return SearchInRegs(entry.symbol->DemangledName(), exclude_names_);
339 });
340 callchain.erase(it, callchain.end());
341 }
342
343 private:
344 std::vector<std::unique_ptr<RegEx>> exclude_names_;
345 };
346
CallChainReportBuilder(ThreadTree & thread_tree)347 CallChainReportBuilder::CallChainReportBuilder(ThreadTree& thread_tree)
348 : thread_tree_(thread_tree) {
349 const char* env_name = "REMOVE_R8_SYNTHESIZED_FRAME";
350 const char* s = getenv(env_name);
351 if (s != nullptr) {
352 auto result = android::base::ParseBool(s);
353 if (result == android::base::ParseBoolResult::kError) {
354 LOG(WARNING) << "invalid value in env variable " << env_name;
355 } else if (result == android::base::ParseBoolResult::kTrue) {
356 LOG(INFO) << "R8 synthesized frames will be removed.";
357 remove_r8_synthesized_frame_ = true;
358 }
359 }
360 SetRemoveArtFrame(true);
361 SetConvertJITFrame(true);
362 }
363
SetRemoveArtFrame(bool enable)364 void CallChainReportBuilder::SetRemoveArtFrame(bool enable) {
365 if (enable) {
366 art_frame_remover_.reset(new ArtFrameRemover);
367 } else {
368 art_frame_remover_.reset(nullptr);
369 }
370 }
371
SetConvertJITFrame(bool enable)372 void CallChainReportBuilder::SetConvertJITFrame(bool enable) {
373 if (enable) {
374 jit_frame_converter_.reset(new JITFrameConverter(thread_tree_));
375 } else {
376 jit_frame_converter_.reset(nullptr);
377 }
378 }
379
AddProguardMappingFile(std::string_view mapping_file)380 bool CallChainReportBuilder::AddProguardMappingFile(std::string_view mapping_file) {
381 if (!java_method_deobfuscater_) {
382 java_method_deobfuscater_.reset(new JavaMethodDeobfuscater(remove_r8_synthesized_frame_));
383 }
384 return static_cast<JavaMethodDeobfuscater&>(*java_method_deobfuscater_)
385 .AddProguardMappingFile(mapping_file);
386 }
387
RemoveMethod(std::string_view method_name_regex)388 bool CallChainReportBuilder::RemoveMethod(std::string_view method_name_regex) {
389 if (!method_name_filter_) {
390 method_name_filter_.reset(new MethodNameFilter);
391 }
392 return static_cast<MethodNameFilter&>(*method_name_filter_).RemoveMethod(method_name_regex);
393 }
394
Build(const ThreadEntry * thread,const std::vector<uint64_t> & ips,size_t kernel_ip_count)395 std::vector<CallChainReportEntry> CallChainReportBuilder::Build(const ThreadEntry* thread,
396 const std::vector<uint64_t>& ips,
397 size_t kernel_ip_count) {
398 std::vector<CallChainReportEntry> result;
399 result.reserve(ips.size());
400 for (size_t i = 0; i < ips.size(); i++) {
401 const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
402 Dso* dso = map->dso;
403 uint64_t vaddr_in_file;
404 const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], &vaddr_in_file, &dso);
405 CallChainExecutionType execution_type = CallChainExecutionType::NATIVE_METHOD;
406 if (dso->IsForJavaMethod()) {
407 if (dso->type() == DSO_DEX_FILE) {
408 execution_type = CallChainExecutionType::INTERPRETED_JVM_METHOD;
409 } else {
410 execution_type = CallChainExecutionType::JIT_JVM_METHOD;
411 }
412 }
413 result.resize(result.size() + 1);
414 auto& entry = result.back();
415 entry.ip = ips[i];
416 entry.symbol = symbol;
417 entry.dso = dso;
418 entry.vaddr_in_file = vaddr_in_file;
419 entry.map = map;
420 entry.execution_type = execution_type;
421 }
422 MarkArtFrame(result);
423 if (art_frame_remover_) {
424 art_frame_remover_->Modify(result);
425 }
426 if (jit_frame_converter_) {
427 jit_frame_converter_->Modify(result);
428 }
429 if (java_method_deobfuscater_) {
430 java_method_deobfuscater_->Modify(result);
431 }
432 if (method_name_filter_) {
433 method_name_filter_->Modify(result);
434 }
435 return result;
436 }
437
MarkArtFrame(std::vector<CallChainReportEntry> & callchain)438 void CallChainReportBuilder::MarkArtFrame(std::vector<CallChainReportEntry>& callchain) {
439 // Mark art methods before or after a JVM method.
440 bool near_java_method = false;
441 bool is_jni_trampoline = false;
442 std::vector<size_t> jni_trampoline_positions;
443 for (size_t i = 0; i < callchain.size(); ++i) {
444 auto& entry = callchain[i];
445 if (entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD ||
446 entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
447 near_java_method = true;
448
449 // Mark art frames before this entry.
450 for (int j = static_cast<int>(i) - 1; j >= 0; j--) {
451 if (!IsArtEntry(callchain[j], &is_jni_trampoline)) {
452 break;
453 }
454 callchain[j].execution_type = CallChainExecutionType::ART_METHOD;
455 if (is_jni_trampoline) {
456 jni_trampoline_positions.push_back(j);
457 }
458 }
459 } else if (near_java_method && IsArtEntry(entry, &is_jni_trampoline)) {
460 entry.execution_type = CallChainExecutionType::ART_METHOD;
461 if (is_jni_trampoline) {
462 jni_trampoline_positions.push_back(i);
463 }
464 } else {
465 near_java_method = false;
466 }
467 }
468 // Functions called by art_jni_trampoline are jni methods. And we don't want to hide them.
469 for (auto i : jni_trampoline_positions) {
470 if (i > 0 && callchain[i - 1].execution_type == CallChainExecutionType::ART_METHOD) {
471 callchain[i - 1].execution_type = CallChainExecutionType::NATIVE_METHOD;
472 }
473 }
474 }
475
AggregateThreads(const std::vector<std::string> & thread_name_regex)476 bool ThreadReportBuilder::AggregateThreads(const std::vector<std::string>& thread_name_regex) {
477 size_t i = thread_regs_.size();
478 thread_regs_.resize(i + thread_name_regex.size());
479 for (const auto& reg_str : thread_name_regex) {
480 std::unique_ptr<RegEx> re = RegEx::Create(reg_str);
481 if (!re) {
482 return false;
483 }
484 thread_regs_[i++].re = std::move(re);
485 }
486 return true;
487 }
488
Build(const ThreadEntry & thread)489 ThreadReport ThreadReportBuilder::Build(const ThreadEntry& thread) {
490 ThreadReport report(thread.pid, thread.tid, thread.comm);
491 ModifyReportToAggregateThreads(report);
492 return report;
493 }
494
ModifyReportToAggregateThreads(ThreadReport & report)495 void ThreadReportBuilder::ModifyReportToAggregateThreads(ThreadReport& report) {
496 if (thread_regs_.empty()) {
497 // No modification when there are no regular expressions.
498 return;
499 }
500 const std::string thread_name = report.thread_name;
501 if (auto it = thread_map_.find(thread_name); it != thread_map_.end()) {
502 // Found cached result in thread_map_.
503 if (it->second != -1) {
504 report = thread_regs_[it->second].report;
505 }
506 return;
507 }
508 // Run the slow path to walk through every regular expression.
509 size_t index;
510 for (index = 0; index < thread_regs_.size(); ++index) {
511 if (thread_regs_[index].re->Match(thread_name)) {
512 break;
513 }
514 }
515 if (index == thread_regs_.size()) {
516 thread_map_[thread_name] = -1;
517 } else {
518 thread_map_[thread_name] = static_cast<int>(index);
519 // Modify thread report.
520 auto& aggregated_report = thread_regs_[index].report;
521 if (aggregated_report.thread_name == nullptr) {
522 // Use regular expression as the name of the aggregated thread. So users know it's an
523 // aggregated thread.
524 aggregated_report =
525 ThreadReport(report.pid, report.tid, thread_regs_[index].re->GetPattern().c_str());
526 }
527 report = aggregated_report;
528 }
529 }
530
531 } // namespace simpleperf
532