1 /*
2  * Copyright 2022, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <regex>
18 #include <string>
19 
20 #include "dmesg_parser.h"
21 
22 namespace dmesg_parser {
23 
24 const std::string kTimestampRe = "^\\[[^\\]]+\\]\\s";
25 
DmesgParser()26 DmesgParser::DmesgParser() : report_ready_(false) {
27     std::string bug_types;
28     for (auto t : {"KFENCE", "KASAN"}) {
29         if (bug_types.empty()) {
30             bug_types = t;
31         } else {
32             bug_types.append("|");
33             bug_types.append(t);
34         }
35     }
36     std::string bug_re = kTimestampRe + "\\[([0-9T\\s]+)\\]\\s(BUG: (" + bug_types + "):.*)";
37     this->bug_pattern_ = std::regex(bug_re);
38     this->ignore_pattern_ = std::regex("([ _][Rx]..|raw): [0-9a-f]{16}|"
39                                        "Hardware name:|Comm:");
40     this->addr64_pattern_ = std::regex("\\b(?:0x)?[0-9a-f]{16}\\b");
41 }
42 
43 /*
44  * Read a single line terminated by a newline, and process it as follows:
45  * 1. If we haven't seen a bug header, skip the current line unless it contains
46  *    "BUG:".
47  *    If it does, parse the line to extract the task ID (T1234), tool name
48  *    (KASAN or KFENCE) and the whole report title (needed for report
49  *    deduplication).
50  * 2. If the current line does not contain the known task ID, skip it.
51  * 3. If the current line contains a delimiter ("====="), stop accepting new
52  *    lines.
53  * 4. Otherwise strip potential sensitive data from the current line and append
54  *    it to the report.
55  */
ProcessLine(const std::string & line)56 void DmesgParser::ProcessLine(const std::string& line) {
57     if (report_ready_) return;
58 
59     // We haven't encountered a BUG: line yet.
60     if (current_report_.empty()) {
61         std::smatch m;
62         if (std::regex_search(line, m, bug_pattern_)) {
63             std::string task_re = kTimestampRe + "\\[" + std::string(m[1]) + "\\]\\s";
64             task_line_pattern_ = std::regex(task_re);
65             task_delimiter_pattern_ = std::regex(task_re + "={10,}");
66             current_title_ = m[2];
67             current_tool_ = m[3];
68             current_report_ = this->StripSensitiveData(line);
69         }
70         return;
71     }
72 
73     // If there is a delimiter, mark the current report as ready.
74     if (std::regex_search(line, task_delimiter_pattern_)) {
75         report_ready_ = true;
76         return;
77     }
78 
79     if (std::regex_search(line, task_line_pattern_)) current_report_ += StripSensitiveData(line);
80 }
81 
82 /*
83  * Return true iff the current report is ready (it was terminated by the "====="
84  * delimiter.
85  */
ReportReady() const86 bool DmesgParser::ReportReady() const {
87     return report_ready_;
88 }
89 
90 /*
91  * Return the tool that generated the currently collected report.
92  */
ReportType() const93 std::string DmesgParser::ReportType() const {
94     return current_tool_;
95 }
96 
97 /*
98  * Return the title of the currently collected report.
99  */
ReportTitle() const100 std::string DmesgParser::ReportTitle() const {
101     return current_title_;
102 }
103 
104 /*
105  * Return the report collected so far and reset the parser.
106  */
FlushReport()107 std::string DmesgParser::FlushReport() {
108     report_ready_ = false;
109     return std::move(current_report_);
110 }
111 
112 /*
113  * Strip potentially sensitive data from the reports by performing the
114  * following actions:
115  *  1. Drop the entire line, if it contains a process name:
116  *       [   69.547684] [ T6006]c7   6006  CPU: 7 PID: 6006 Comm: sh Tainted:
117  *
118  *     or hardware name:
119  *       [   69.558923] [ T6006]c7   6006  Hardware name: Phone1
120  *
121  *     or a memory dump, e.g.:
122  *
123  *        ... raw: 4000000000010200 0000000000000000 0000000000000000
124  *
125  *      or register dump:
126  *
127  *        ... RIP: 0033:0x7f96443109da
128  *        ... RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
129  *        ... RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
130  *
131  *      (on x86_64)
132  *
133  *        ... pc : lpm_cpuidle_enter+0x258/0x384
134  *        ... lr : lpm_cpuidle_enter+0x1d4/0x384
135  *        ... sp : ffffff800820bea0
136  *        ... x29: ffffff800820bea0 x28: ffffffc2305f3ce0
137  *        ... ...
138  *        ... x9 : 0000000000000001 x8 : 0000000000000000
139  *
140  *      (on ARM64)
141  *
142  *  2. For substrings that are known to be followed by sensitive information,
143  *     cut the line after those substrings and append "DELETED\n",
144  *     e.g. " by task ":
145  *        ... Read at addr f0ffff87c23fdf7f by task sh/9971
146  *     and "Corrupted memory at":
147  *        ... Corrupted memory at 0xf0ffff87c23fdf00 [ ! . . . . . . . . . . . . . . . ]
148  *
149  *  3. Replace all strings that look like 64-bit hexadecimal values, with
150  *     XXXXXXXXXXXXXXXX.
151  */
StripSensitiveData(const std::string & line) const152 std::string DmesgParser::StripSensitiveData(const std::string& line) const {
153     if (std::regex_search(line, ignore_pattern_)) return "";
154 
155     std::string ret = line;
156     for (std::string infix : {"Corrupted memory at ", " by task "}) {
157         auto pos = ret.find(infix);
158         if (pos != std::string::npos) {
159             ret = ret.substr(0, pos + infix.size()) + "DELETED\n";
160         }
161     }
162     ret = std::regex_replace(ret, addr64_pattern_, "XXXXXXXXXXXXXXXX");
163     return ret;
164 }
165 
166 }  // namespace dmesg_parser
167