1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // The bootstat command provides options to persist boot events with the current
18 // timestamp, dump the persisted events, and log all events to EventLog to be
19 // uploaded to Android log storage via Tron.
20 
21 #include <getopt.h>
22 #include <sys/klog.h>
23 #include <unistd.h>
24 
25 #include <chrono>
26 #include <cmath>
27 #include <cstddef>
28 #include <cstdio>
29 #include <ctime>
30 #include <iterator>
31 #include <map>
32 #include <memory>
33 #include <regex>
34 #include <string>
35 #include <string_view>
36 #include <unordered_map>
37 #include <utility>
38 #include <vector>
39 
40 #include <android-base/chrono_utils.h>
41 #include <android-base/file.h>
42 #include <android-base/logging.h>
43 #include <android-base/parseint.h>
44 #include <android-base/properties.h>
45 #include <android-base/strings.h>
46 #include <android/log.h>
47 #include <cutils/android_reboot.h>
48 #include <cutils/properties.h>
49 #include <statslog.h>
50 
51 #include "boot_event_record_store.h"
52 
53 namespace {
54 
55 struct AtomInfo {
56   int32_t atom;
57   int32_t event;
58 };
59 
60 // Maps BootEvent used inside bootstat into statsd atom defined in
61 // frameworks/proto_logging/stats/atoms.proto.
62 const std::unordered_map<std::string_view, AtomInfo> kBootEventToAtomInfo = {
63     // ELAPSED_TIME
64     {"ro.boottime.init",
65      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
66       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__ANDROID_INIT_STAGE_1}},
67     {"boot_complete",
68      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
69       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__BOOT_COMPLETE}},
70     {"boot_complete_no_encryption",
71      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
72       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__BOOT_COMPLETE_NO_ENCRYPTION}},
73     {"factory_reset_boot_complete",
74      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
75       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__FACTORY_RESET_BOOT_COMPLETE}},
76     {"factory_reset_boot_complete_no_encryption",
77      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
78       android::util::
79           BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__FACTORY_RESET_BOOT_COMPLETE_NO_ENCRYPTION}},
80     {"ota_boot_complete",
81      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
82       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__OTA_BOOT_COMPLETE}},
83     {"ota_boot_complete_no_encryption",
84      {android::util::BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
85       android::util::BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__OTA_BOOT_COMPLETE_NO_ENCRYPTION}},
86     // DURATION
87     {"absolute_boot_time",
88      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
89       android::util::BOOT_TIME_EVENT_DURATION__EVENT__ABSOLUTE_BOOT_TIME}},
90     {"boottime.bootloader.1BLE",
91      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
92       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_FIRST_STAGE_EXEC}},
93     {"boottime.bootloader.1BLL",
94      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
95       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_FIRST_STAGE_LOAD}},
96     {"boottime.bootloader.KL",
97      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
98       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_KERNEL_LOAD}},
99     {"boottime.bootloader.2BLE",
100      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
101       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_SECOND_STAGE_EXEC}},
102     {"boottime.bootloader.2BLL",
103      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
104       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_SECOND_STAGE_LOAD}},
105     {"boottime.bootloader.SW",
106      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
107       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_UI_WAIT}},
108     {"boottime.bootloader.total",
109      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
110       android::util::BOOT_TIME_EVENT_DURATION__EVENT__BOOTLOADER_TOTAL}},
111     {"boottime.init.cold_boot_wait",
112      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
113       android::util::BOOT_TIME_EVENT_DURATION__EVENT__COLDBOOT_WAIT}},
114     {"time_since_factory_reset",
115      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
116       android::util::BOOT_TIME_EVENT_DURATION__EVENT__FACTORY_RESET_TIME_SINCE_RESET}},
117     {"ro.boottime.init.first_stage",
118      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
119       android::util::BOOT_TIME_EVENT_DURATION__EVENT__ANDROID_INIT_STAGE_1}},
120     {"ro.boottime.init.selinux",
121      {android::util::BOOT_TIME_EVENT_DURATION_REPORTED,
122       android::util::BOOT_TIME_EVENT_DURATION__EVENT__SELINUX_INIT}},
123     // UTC_TIME
124     {"factory_reset",
125      {android::util::BOOT_TIME_EVENT_UTC_TIME_REPORTED,
126       android::util::BOOT_TIME_EVENT_UTC_TIME__EVENT__FACTORY_RESET_RESET_TIME}},
127     {"factory_reset_current_time",
128      {android::util::BOOT_TIME_EVENT_UTC_TIME_REPORTED,
129       android::util::BOOT_TIME_EVENT_UTC_TIME__EVENT__FACTORY_RESET_CURRENT_TIME}},
130     {"factory_reset_record_value",
131      {android::util::BOOT_TIME_EVENT_UTC_TIME_REPORTED,
132       android::util::BOOT_TIME_EVENT_UTC_TIME__EVENT__FACTORY_RESET_RECORD_VALUE}},
133     // ERROR_CODE
134     {"factory_reset_current_time_failure",
135      {android::util::BOOT_TIME_EVENT_ERROR_CODE_REPORTED,
136       android::util::BOOT_TIME_EVENT_ERROR_CODE__EVENT__FACTORY_RESET_CURRENT_TIME_FAILURE}},
137 };
138 
139 // Scans the boot event record store for record files and logs each boot event
140 // via EventLog.
LogBootEvents()141 void LogBootEvents() {
142   BootEventRecordStore boot_event_store;
143   auto events = boot_event_store.GetAllBootEvents();
144   std::vector<std::string_view> notSupportedEvents;
145   for (const auto& event : events) {
146     const auto& name = event.first;
147     const auto& info = kBootEventToAtomInfo.find(name);
148     if (info != kBootEventToAtomInfo.end()) {
149       if (info->second.atom == android::util::BOOT_TIME_EVENT_ERROR_CODE_REPORTED) {
150         android::util::stats_write(static_cast<int32_t>(info->second.atom),
151                                    static_cast<int32_t>(info->second.event),
152                                    static_cast<int32_t>(event.second));
153       } else {
154         android::util::stats_write(static_cast<int32_t>(info->second.atom),
155                                    static_cast<int32_t>(info->second.event),
156                                    static_cast<int64_t>(event.second));
157       }
158     } else {
159       notSupportedEvents.push_back(name);
160     }
161   }
162   if (!notSupportedEvents.empty()) {
163     LOG(WARNING) << "LogBootEvents, atomInfo not defined for events:"
164                  << android::base::Join(notSupportedEvents, ',');
165   }
166 }
167 
168 // Records the named boot |event| to the record store. If |value| is non-empty
169 // and is a proper string representation of an integer value, the converted
170 // integer value is associated with the boot event.
RecordBootEventFromCommandLine(const std::string & event,const std::string & value_str)171 void RecordBootEventFromCommandLine(const std::string& event, const std::string& value_str) {
172   BootEventRecordStore boot_event_store;
173   if (!value_str.empty()) {
174     int32_t value = 0;
175     if (android::base::ParseInt(value_str, &value)) {
176       boot_event_store.AddBootEventWithValue(event, value);
177     }
178   } else {
179     boot_event_store.AddBootEvent(event);
180   }
181 }
182 
PrintBootEvents()183 void PrintBootEvents() {
184   printf("Boot events:\n");
185   printf("------------\n");
186 
187   BootEventRecordStore boot_event_store;
188   auto events = boot_event_store.GetAllBootEvents();
189   for (auto i = events.cbegin(); i != events.cend(); ++i) {
190     printf("%s\t%d\n", i->first.c_str(), i->second);
191   }
192 }
193 
ShowHelp(const char * cmd)194 void ShowHelp(const char* cmd) {
195   fprintf(stderr, "Usage: %s [options]...\n", cmd);
196   fprintf(stderr,
197           "options include:\n"
198           "  -h, --help              Show this help\n"
199           "  -l, --log               Log all metrics to logstorage\n"
200           "  -p, --print             Dump the boot event records to the console\n"
201           "  -r, --record            Record the timestamp of a named boot event\n"
202           "  --value                 Optional value to associate with the boot event\n"
203           "  --record_boot_complete  Record metrics related to the time for the device boot\n"
204           "  --record_boot_reason    Record the reason why the device booted\n"
205           "  --record_time_since_factory_reset  Record the time since the device was reset\n"
206           "  --boot_reason_enum=<reason>  Report the match to the kBootReasonMap table\n");
207 }
208 
209 // Constructs a readable, printable string from the givencommand line
210 // arguments.
GetCommandLine(int argc,char ** argv)211 std::string GetCommandLine(int argc, char** argv) {
212   std::string cmd;
213   for (int i = 0; i < argc; ++i) {
214     cmd += argv[i];
215     cmd += " ";
216   }
217 
218   return cmd;
219 }
220 
221 constexpr int32_t kEmptyBootReason = 0;
222 constexpr int32_t kUnknownBootReason = 1;
223 
224 // A mapping from boot reason string, as read from the ro.boot.bootreason
225 // system property, to a unique integer ID. Viewers of log data dashboards for
226 // the boot_reason metric may refer to this mapping to discern the histogram
227 // values.  Regex matching, to manage the scale, as a minimum require either
228 // [, \ or * to be present in the string to switch to checking.
229 const std::map<std::string, int32_t> kBootReasonMap = {
230     {"reboot,[empty]", kEmptyBootReason},
231     {"__BOOTSTAT_UNKNOWN__", kUnknownBootReason},
232     {"normal", 2},
233     {"recovery", 3},
234     {"reboot", 4},
235     {"PowerKey", 5},
236     {"hard_reset", 6},
237     {"kernel_panic", 7},
238     {"rpm_err", 8},
239     {"hw_reset", 9},
240     {"tz_err", 10},
241     {"adsp_err", 11},
242     {"modem_err", 12},
243     {"mba_err", 13},
244     {"Watchdog", 14},
245     {"Panic", 15},
246     {"power_key", 16},  // aliasReasons to cold,powerkey (Mediatek)
247     {"power_on", 17},   // aliasReasons to cold,powerkey
248     {"Reboot", 18},
249     {"rtc", 19},
250     {"edl", 20},
251     {"oem_pon1", 21},
252     {"oem_powerkey", 22},  // aliasReasons to cold,powerkey
253     {"oem_unknown_reset", 23},
254     {"srto: HWWDT reset SC", 24},
255     {"srto: HWWDT reset platform", 25},
256     {"srto: bootloader", 26},
257     {"srto: kernel panic", 27},
258     {"srto: kernel watchdog reset", 28},
259     {"srto: normal", 29},
260     {"srto: reboot", 30},
261     {"srto: reboot-bootloader", 31},
262     {"srto: security watchdog reset", 32},
263     {"srto: wakesrc", 33},
264     {"srto: watchdog", 34},
265     {"srto:1-1", 35},
266     {"srto:omap_hsmm", 36},
267     {"srto:phy0", 37},
268     {"srto:rtc0", 38},
269     {"srto:touchpad", 39},
270     {"watchdog", 40},
271     {"watchdogr", 41},
272     {"wdog_bark", 42},
273     {"wdog_bite", 43},
274     {"wdog_reset", 44},
275     {"shutdown,", 45},  // Trailing comma is intentional. Do NOT use.
276     {"shutdown,userrequested", 46},
277     {"reboot,bootloader", 47},
278     {"reboot,cold", 48},
279     {"reboot,recovery", 49},
280     {"thermal_shutdown", 50},
281     {"s3_wakeup", 51},
282     {"kernel_panic,sysrq", 52},
283     {"kernel_panic,NULL", 53},
284     {"kernel_panic,null", 53},
285     {"kernel_panic,BUG", 54},
286     {"kernel_panic,bug", 54},
287     {"bootloader", 55},
288     {"cold", 56},
289     {"hard", 57},
290     {"warm", 58},
291     {"reboot,kernel_power_off_charging__reboot_system", 59},  // Can not happen
292     {"thermal-shutdown", 60},
293     {"shutdown,thermal", 61},
294     {"shutdown,battery", 62},
295     {"reboot,ota", 63},
296     {"reboot,factory_reset", 64},
297     {"reboot,", 65},
298     {"reboot,shell", 66},
299     {"reboot,adb", 67},
300     {"reboot,userrequested", 68},
301     {"shutdown,container", 69},  // Host OS asking Android Container to shutdown
302     {"cold,powerkey", 70},
303     {"warm,s3_wakeup", 71},
304     {"hard,hw_reset", 72},
305     {"shutdown,suspend", 73},    // Suspend to RAM
306     {"shutdown,hibernate", 74},  // Suspend to DISK
307     {"power_on_key", 75},        // aliasReasons to cold,powerkey
308     {"reboot_by_key", 76},       // translated to reboot,by_key
309     {"wdt_by_pass_pwk", 77},     // Mediatek
310     {"reboot_longkey", 78},      // translated to reboot,longkey
311     {"powerkey", 79},            // aliasReasons to cold,powerkey
312     {"usb", 80},                 // aliasReasons to cold,charger (Mediatek)
313     {"wdt", 81},                 // Mediatek
314     {"tool_by_pass_pwk", 82},    // aliasReasons to reboot,tool (Mediatek)
315     {"2sec_reboot", 83},         // aliasReasons to cold,rtc,2sec (Mediatek)
316     {"reboot,by_key", 84},
317     {"reboot,longkey", 85},
318     {"reboot,2sec", 86},  // Deprecate in two years, replaced with cold,rtc,2sec
319     {"shutdown,thermal,battery", 87},
320     {"reboot,its_just_so_hard", 88},  // produced by boot_reason_test
321     {"reboot,Its Just So Hard", 89},  // produced by boot_reason_test
322     {"reboot,rescueparty", 90},
323     {"charge", 91},
324     {"oem_tz_crash", 92},
325     {"uvlo", 93},  // aliasReasons to reboot,undervoltage
326     {"oem_ps_hold", 94},
327     {"abnormal_reset", 95},
328     {"oemerr_unknown", 96},
329     {"reboot_fastboot_mode", 97},
330     {"watchdog_apps_bite", 98},
331     {"xpu_err", 99},
332     {"power_on_usb", 100},  // aliasReasons to cold,charger
333     {"watchdog_rpm", 101},
334     {"watchdog_nonsec", 102},
335     {"watchdog_apps_bark", 103},
336     {"reboot_dmverity_corrupted", 104},
337     {"reboot_smpl", 105},  // aliasReasons to reboot,powerloss
338     {"watchdog_sdi_apps_reset", 106},
339     {"smpl", 107},  // aliasReasons to reboot,powerloss
340     {"oem_modem_failed_to_powerup", 108},
341     {"reboot_normal", 109},
342     {"oem_lpass_cfg", 110},
343     {"oem_xpu_ns_error", 111},
344     {"power_key_press", 112},  // aliasReasons to cold,powerkey
345     {"hardware_reset", 113},
346     {"reboot_by_powerkey", 114},  // aliasReasons to cold,powerkey (is this correct?)
347     {"reboot_verity", 115},
348     {"oem_rpm_undef_error", 116},
349     {"oem_crash_on_the_lk", 117},
350     {"oem_rpm_reset", 118},
351     {"reboot,powerloss", 119},
352     {"reboot,undervoltage", 120},
353     {"factory_cable", 121},
354     {"oem_ar6320_failed_to_powerup", 122},
355     {"watchdog_rpm_bite", 123},
356     {"power_on_cable", 124},  // aliasReasons to cold,charger
357     {"reboot_unknown", 125},
358     {"wireless_charger", 126},
359     {"0x776655ff", 127},
360     {"oem_thermal_bite_reset", 128},
361     {"charger", 129},
362     {"pon1", 130},
363     {"unknown", 131},
364     {"reboot_rtc", 132},
365     {"cold_boot", 133},
366     {"hard_rst", 134},
367     {"power-on", 135},
368     {"oem_adsp_resetting_the_soc", 136},
369     {"kpdpwr", 137},
370     {"oem_modem_timeout_waiting", 138},
371     {"usb_chg", 139},
372     {"warm_reset_0x02", 140},
373     {"warm_reset_0x80", 141},
374     {"pon_reason_0xb0", 142},
375     {"reboot_download", 143},
376     {"reboot_recovery_mode", 144},
377     {"oem_sdi_err_fatal", 145},
378     {"pmic_watchdog", 146},
379     {"software_master", 147},
380     {"cold,charger", 148},
381     {"cold,rtc", 149},
382     {"cold,rtc,2sec", 150},   // Mediatek
383     {"reboot,tool", 151},     // Mediatek
384     {"reboot,wdt", 152},      // Mediatek
385     {"reboot,unknown", 153},  // Mediatek
386     {"kernel_panic,audit", 154},
387     {"kernel_panic,atomic", 155},
388     {"kernel_panic,hung", 156},
389     {"kernel_panic,hung,rcu", 157},
390     {"kernel_panic,init", 158},
391     {"kernel_panic,oom", 159},
392     {"kernel_panic,stack", 160},
393     {"kernel_panic,sysrq,livelock,alarm", 161},   // llkd
394     {"kernel_panic,sysrq,livelock,driver", 162},  // llkd
395     {"kernel_panic,sysrq,livelock,zombie", 163},  // llkd
396     {"kernel_panic,modem", 164},
397     {"kernel_panic,adsp", 165},
398     {"kernel_panic,dsps", 166},
399     {"kernel_panic,wcnss", 167},
400     {"kernel_panic,_sde_encoder_phys_cmd_handle_ppdone_timeout", 168},
401     {"recovery,quiescent", 169},
402     {"reboot,quiescent", 170},
403     {"reboot,rtc", 171},
404     {"reboot,dm-verity_device_corrupted", 172},
405     {"reboot,dm-verity_enforcing", 173},
406     {"reboot,keys_clear", 174},
407     {"reboot,pmic_off_fault,.*", 175},
408     {"reboot,pmic_off_s3rst,.*", 176},
409     {"reboot,pmic_off_other,.*", 177},
410     {"reboot,userrequested,fastboot", 178},
411     {"reboot,userrequested,recovery", 179},
412     {"reboot,userrequested,recovery,ui", 180},
413     {"shutdown,userrequested,fastboot", 181},
414     {"shutdown,userrequested,recovery", 182},
415     {"reboot,unknown[0-9]*", 183},
416     {"reboot,longkey,.*", 184},
417     {"reboot,boringssl-self-check-failed", 185},
418     {"reboot,userspace_failed,shutdown_aborted", 186},
419     {"reboot,userspace_failed,watchdog_triggered", 187},
420     {"reboot,userspace_failed,watchdog_fork", 188},
421     {"reboot,userspace_failed,*", 189},
422     {"reboot,mount_userdata_failed", 190},
423     {"reboot,forcedsilent", 191},
424     {"reboot,forcednonsilent", 192},
425     {"reboot,thermal,tj", 193},
426     {"reboot,emergency", 194},
427     {"reboot,factory", 195},
428     {"reboot,fastboot", 196},
429     {"reboot,gsa,hard", 197},
430     {"reboot,gsa,soft", 198},
431     {"reboot,master_dc,fault_n", 199},
432     {"reboot,master_dc,reset", 200},
433     {"reboot,ocp", 201},
434     {"reboot,pin", 202},
435     {"reboot,rom_recovery", 203},
436     {"reboot,uvlo", 204},
437     {"reboot,uvlo,pmic,if", 205},
438     {"reboot,uvlo,pmic,main", 206},
439     {"reboot,uvlo,pmic,sub", 207},
440     {"reboot,warm", 208},
441     {"watchdog,aoc", 209},
442     {"watchdog,apc", 210},
443     {"watchdog,apc,bl,debug,early", 211},
444     {"watchdog,apc,bl,early", 212},
445     {"watchdog,apc,early", 213},
446     {"watchdog,apm", 214},
447     {"watchdog,gsa,hard", 215},
448     {"watchdog,gsa,soft", 216},
449     {"watchdog,pmucal", 217},
450     {"reboot,early,bl", 218},
451     {"watchdog,apc,gsa,crashed", 219},
452     {"watchdog,apc,bl31,crashed", 220},
453     {"watchdog,apc,pbl,crashed", 221},
454     {"reboot,memory_protect,hyp", 222},
455     {"reboot,tsd,pmic,main", 223},
456     {"reboot,tsd,pmic,sub", 224},
457     {"reboot,ocp,pmic,main", 225},
458     {"reboot,ocp,pmic,sub", 226},
459     {"reboot,sys_ldo_ok,pmic,main", 227},
460     {"reboot,sys_ldo_ok,pmic,sub", 228},
461     {"reboot,smpl_timeout,pmic,main", 229},
462     {"reboot,ota,.*", 230},
463     {"reboot,periodic,.*", 231},
464     {"reboot,early,abl", 232},
465     {"reboot,early,bl2", 233},
466     {"reboot,longkey,pmic_cold", 234},
467     {"reboot,longkey,master_dc", 235},
468     {"reboot,ocp2,pmic,if", 236},
469     {"reboot,ocp,pmic,if", 237},
470     {"reboot,fship", 238},
471     {"reboot,ocp,.*", 239},
472 };
473 
474 // Converts a string value representing the reason the system booted to an
475 // integer representation. This is necessary for logging the boot_reason metric
476 // via Tron, which does not accept non-integer buckets in histograms.
BootReasonStrToEnum(const std::string & boot_reason)477 int32_t BootReasonStrToEnum(const std::string& boot_reason) {
478   auto mapping = kBootReasonMap.find(boot_reason);
479   if (mapping != kBootReasonMap.end()) {
480     return mapping->second;
481   }
482 
483   if (boot_reason.empty()) {
484     return kEmptyBootReason;
485   }
486 
487   for (const auto& [match, id] : kBootReasonMap) {
488     // Regex matches as a minimum require either [, \ or * to be present.
489     if (match.find_first_of("[\\*") == match.npos) continue;
490     // enforce match from beginning to end
491     auto exact = match;
492     if (exact[0] != '^') exact = "^" + exact;
493     if (exact[exact.size() - 1] != '$') exact = exact + "$";
494     if (std::regex_search(boot_reason, std::regex(exact))) return id;
495   }
496 
497   LOG(INFO) << "Unknown boot reason: " << boot_reason;
498   return kUnknownBootReason;
499 }
500 
501 // Canonical list of supported primary reboot reasons.
502 const std::vector<const std::string> knownReasons = {
503     // clang-format off
504     // kernel
505     "watchdog",
506     "kernel_panic",
507     // strong
508     "recovery",    // Should not happen from ro.boot.bootreason
509     "bootloader",  // Should not happen from ro.boot.bootreason
510     // blunt
511     "cold",
512     "hard",
513     "warm",
514     // super blunt
515     "shutdown",    // Can not happen from ro.boot.bootreason
516     "reboot",      // Default catch-all for anything unknown
517     // clang-format on
518 };
519 
520 // Returns true if the supplied reason prefix is considered detailed enough.
isStrongRebootReason(const std::string & r)521 bool isStrongRebootReason(const std::string& r) {
522   for (auto& s : knownReasons) {
523     if (s == "cold") break;
524     // Prefix defined as terminated by a nul or comma (,).
525     if (android::base::StartsWith(r, s) && ((r.length() == s.length()) || (r[s.length()] == ','))) {
526       return true;
527     }
528   }
529   return false;
530 }
531 
532 // Returns true if the supplied reason prefix is associated with the kernel.
isKernelRebootReason(const std::string & r)533 bool isKernelRebootReason(const std::string& r) {
534   for (auto& s : knownReasons) {
535     if (s == "recovery") break;
536     // Prefix defined as terminated by a nul or comma (,).
537     if (android::base::StartsWith(r, s) && ((r.length() == s.length()) || (r[s.length()] == ','))) {
538       return true;
539     }
540   }
541   return false;
542 }
543 
544 // Returns true if the supplied reason prefix is considered known.
isKnownRebootReason(const std::string & r)545 bool isKnownRebootReason(const std::string& r) {
546   for (auto& s : knownReasons) {
547     // Prefix defined as terminated by a nul or comma (,).
548     if (android::base::StartsWith(r, s) && ((r.length() == s.length()) || (r[s.length()] == ','))) {
549       return true;
550     }
551   }
552   return false;
553 }
554 
555 // If the reboot reason should be improved, report true if is too blunt.
isBluntRebootReason(const std::string & r)556 bool isBluntRebootReason(const std::string& r) {
557   if (isStrongRebootReason(r)) return false;
558 
559   if (!isKnownRebootReason(r)) return true;  // Can not support unknown as detail
560 
561   size_t pos = 0;
562   while ((pos = r.find(',', pos)) != std::string::npos) {
563     ++pos;
564     std::string next(r.substr(pos));
565     if (next.length() == 0) break;
566     if (next[0] == ',') continue;
567     if (!isKnownRebootReason(next)) return false;  // Unknown subreason is good.
568     if (isStrongRebootReason(next)) return false;  // eg: reboot,reboot
569   }
570   return true;
571 }
572 
readPstoreConsole(std::string & console)573 bool readPstoreConsole(std::string& console) {
574   if (android::base::ReadFileToString("/sys/fs/pstore/console-ramoops-0", &console)) {
575     return true;
576   }
577   return android::base::ReadFileToString("/sys/fs/pstore/console-ramoops", &console);
578 }
579 
580 // Implement a variant of std::string::rfind that is resilient to errors in
581 // the data stream being inspected.
582 class pstoreConsole {
583  private:
584   const size_t kBitErrorRate = 8;  // number of bits per error
585   const std::string& console;
586 
587   // Number of bits that differ between the two arguments l and r.
588   // Returns zero if the values for l and r are identical.
numError(uint8_t l,uint8_t r) const589   size_t numError(uint8_t l, uint8_t r) const { return std::bitset<8>(l ^ r).count(); }
590 
591   // A string comparison function, reports the number of errors discovered
592   // in the match to a maximum of the bitLength / kBitErrorRate, at that
593   // point returning npos to indicate match is too poor.
594   //
595   // Since called in rfind which works backwards, expect cache locality will
596   // help if we check in reverse here as well for performance.
597   //
598   // Assumption: l (from console.c_str() + pos) is long enough to house
599   //             _r.length(), checked in rfind caller below.
600   //
numError(size_t pos,const std::string & _r) const601   size_t numError(size_t pos, const std::string& _r) const {
602     const char* l = console.c_str() + pos;
603     const char* r = _r.c_str();
604     size_t n = _r.length();
605     const uint8_t* le = reinterpret_cast<const uint8_t*>(l) + n;
606     const uint8_t* re = reinterpret_cast<const uint8_t*>(r) + n;
607     size_t count = 0;
608     n = 0;
609     do {
610       // individual character bit error rate > threshold + slop
611       size_t num = numError(*--le, *--re);
612       if (num > ((8 + kBitErrorRate) / kBitErrorRate)) return std::string::npos;
613       // total bit error rate > threshold + slop
614       count += num;
615       ++n;
616       if (count > ((n * 8 + kBitErrorRate - (n > 2)) / kBitErrorRate)) {
617         return std::string::npos;
618       }
619     } while (le != reinterpret_cast<const uint8_t*>(l));
620     return count;
621   }
622 
623  public:
pstoreConsole(const std::string & console)624   explicit pstoreConsole(const std::string& console) : console(console) {}
625   // scope of argument must be equal to or greater than scope of pstoreConsole
626   explicit pstoreConsole(const std::string&& console) = delete;
627   explicit pstoreConsole(std::string&& console) = delete;
628 
629   // Our implementation of rfind, use exact match first, then resort to fuzzy.
rfind(const std::string & needle) const630   size_t rfind(const std::string& needle) const {
631     size_t pos = console.rfind(needle);  // exact match?
632     if (pos != std::string::npos) return pos;
633 
634     // Check to make sure needle fits in console string.
635     pos = console.length();
636     if (needle.length() > pos) return std::string::npos;
637     pos -= needle.length();
638     // fuzzy match to maximum kBitErrorRate
639     for (;;) {
640       if (numError(pos, needle) != std::string::npos) return pos;
641       if (pos == 0) break;
642       --pos;
643     }
644     return std::string::npos;
645   }
646 
647   // Our implementation of find, use only fuzzy match.
find(const std::string & needle,size_t start=0) const648   size_t find(const std::string& needle, size_t start = 0) const {
649     // Check to make sure needle fits in console string.
650     if (needle.length() > console.length()) return std::string::npos;
651     const size_t last_pos = console.length() - needle.length();
652     // fuzzy match to maximum kBitErrorRate
653     for (size_t pos = start; pos <= last_pos; ++pos) {
654       if (numError(pos, needle) != std::string::npos) return pos;
655     }
656     return std::string::npos;
657   }
658 
operator const std::string&() const659   operator const std::string&() const { return console; }
660 };
661 
662 // If bit error match to needle, correct it.
663 // Return true if any corrections were discovered and applied.
correctForBitError(std::string & reason,const std::string & needle)664 bool correctForBitError(std::string& reason, const std::string& needle) {
665   bool corrected = false;
666   if (reason.length() < needle.length()) return corrected;
667   const pstoreConsole console(reason);
668   const size_t last_pos = reason.length() - needle.length();
669   for (size_t pos = 0; pos <= last_pos; pos += needle.length()) {
670     pos = console.find(needle, pos);
671     if (pos == std::string::npos) break;
672 
673     // exact match has no malice
674     if (needle == reason.substr(pos, needle.length())) continue;
675 
676     corrected = true;
677     reason = reason.substr(0, pos) + needle + reason.substr(pos + needle.length());
678   }
679   return corrected;
680 }
681 
682 // If bit error match to needle, correct it.
683 // Return true if any corrections were discovered and applied.
684 // Try again if we can replace underline with spaces.
correctForBitErrorOrUnderline(std::string & reason,const std::string & needle)685 bool correctForBitErrorOrUnderline(std::string& reason, const std::string& needle) {
686   bool corrected = correctForBitError(reason, needle);
687   std::string _needle(needle);
688   std::transform(_needle.begin(), _needle.end(), _needle.begin(),
689                  [](char c) { return (c == '_') ? ' ' : c; });
690   if (needle != _needle) {
691     corrected |= correctForBitError(reason, _needle);
692   }
693   return corrected;
694 }
695 
696 // Converts a string value representing the reason the system booted to a
697 // string complying with Android system standard reason.
transformReason(std::string & reason)698 void transformReason(std::string& reason) {
699   std::transform(reason.begin(), reason.end(), reason.begin(), ::tolower);
700   std::transform(reason.begin(), reason.end(), reason.begin(),
701                  [](char c) { return ::isblank(c) ? '_' : c; });
702   std::transform(reason.begin(), reason.end(), reason.begin(),
703                  [](char c) { return ::isprint(c) ? c : '?'; });
704 }
705 
706 // Check subreasons for reboot,<subreason> kernel_panic,sysrq,<subreason> or
707 // kernel_panic,<subreason>.
708 //
709 // If quoted flag is set, pull out and correct single quoted ('), newline (\n)
710 // or unprintable character terminated subreason, pos is supplied just beyond
711 // first quote.  if quoted false, pull out and correct newline (\n) or
712 // unprintable character terminated subreason.
713 //
714 // Heuristics to find termination is painted into a corner:
715 
716 // single bit error for quote ' that we can block.  It is acceptable for
717 // the others 7, g in reason.  2/9 chance will miss the terminating quote,
718 // but there is always the terminating newline that usually immediately
719 // follows to fortify our chances.
likely_single_quote(char c)720 bool likely_single_quote(char c) {
721   switch (static_cast<uint8_t>(c)) {
722     case '\'':         // '\''
723     case '\'' ^ 0x01:  // '&'
724     case '\'' ^ 0x02:  // '%'
725     case '\'' ^ 0x04:  // '#'
726     case '\'' ^ 0x08:  // '/'
727       return true;
728     case '\'' ^ 0x10:  // '7'
729       break;
730     case '\'' ^ 0x20:  // '\a' (unprintable)
731       return true;
732     case '\'' ^ 0x40:  // 'g'
733       break;
734     case '\'' ^ 0x80:  // 0xA7 (unprintable)
735       return true;
736   }
737   return false;
738 }
739 
740 // ::isprint(c) and likely_space() will prevent us from being called for
741 // fundamentally printable entries, except for '\r' and '\b'.
742 //
743 // Except for * and J, single bit errors for \n, all others are non-
744 // printable so easy catch.  It is _acceptable_ for *, J or j to exist in
745 // the reason string, so 2/9 chance we will miss the terminating newline.
746 //
747 // NB: J might not be acceptable, except if at the beginning or preceded
748 //     with a space, '(' or any of the quotes and their BER aliases.
749 // NB: * might not be acceptable, except if at the beginning or preceded
750 //     with a space, another *, or any of the quotes or their BER aliases.
751 //
752 // To reduce the chances to closer to 1/9 is too complicated for the gain.
likely_newline(char c)753 bool likely_newline(char c) {
754   switch (static_cast<uint8_t>(c)) {
755     case '\n':         // '\n' (unprintable)
756     case '\n' ^ 0x01:  // '\r' (unprintable)
757     case '\n' ^ 0x02:  // '\b' (unprintable)
758     case '\n' ^ 0x04:  // 0x0E (unprintable)
759     case '\n' ^ 0x08:  // 0x02 (unprintable)
760     case '\n' ^ 0x10:  // 0x1A (unprintable)
761       return true;
762     case '\n' ^ 0x20:  // '*'
763     case '\n' ^ 0x40:  // 'J'
764       break;
765     case '\n' ^ 0x80:  // 0x8A (unprintable)
766       return true;
767   }
768   return false;
769 }
770 
771 // ::isprint(c) will prevent us from being called for all the printable
772 // matches below.  If we let unprintables through because of this, they
773 // get converted to underscore (_) by the validation phase.
likely_space(char c)774 bool likely_space(char c) {
775   switch (static_cast<uint8_t>(c)) {
776     case ' ':          // ' '
777     case ' ' ^ 0x01:   // '!'
778     case ' ' ^ 0x02:   // '"'
779     case ' ' ^ 0x04:   // '$'
780     case ' ' ^ 0x08:   // '('
781     case ' ' ^ 0x10:   // '0'
782     case ' ' ^ 0x20:   // '\0' (unprintable)
783     case ' ' ^ 0x40:   // 'P'
784     case ' ' ^ 0x80:   // 0xA0 (unprintable)
785     case '\t':         // '\t'
786     case '\t' ^ 0x01:  // '\b' (unprintable) (likely_newline counters)
787     case '\t' ^ 0x02:  // '\v' (unprintable)
788     case '\t' ^ 0x04:  // '\r' (unprintable) (likely_newline counters)
789     case '\t' ^ 0x08:  // 0x01 (unprintable)
790     case '\t' ^ 0x10:  // 0x19 (unprintable)
791     case '\t' ^ 0x20:  // ')'
792     case '\t' ^ 0x40:  // '1'
793     case '\t' ^ 0x80:  // 0x89 (unprintable)
794       return true;
795   }
796   return false;
797 }
798 
getSubreason(const std::string & content,size_t pos,bool quoted)799 std::string getSubreason(const std::string& content, size_t pos, bool quoted) {
800   static constexpr size_t max_reason_length = 256;
801 
802   std::string subReason(content.substr(pos, max_reason_length));
803   // Correct against any known strings that Bit Error Match
804   for (const auto& s : knownReasons) {
805     correctForBitErrorOrUnderline(subReason, s);
806   }
807   std::string terminator(quoted ? "'" : "");
808   for (const auto& m : kBootReasonMap) {
809     if (m.first.length() <= strlen("cold")) continue;  // too short?
810     if (correctForBitErrorOrUnderline(subReason, m.first + terminator)) continue;
811     if (m.first.length() <= strlen("reboot,cold")) continue;  // short?
812     if (android::base::StartsWith(m.first, "reboot,")) {
813       correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("reboot,")) + terminator);
814     } else if (android::base::StartsWith(m.first, "kernel_panic,sysrq,")) {
815       correctForBitErrorOrUnderline(subReason,
816                                     m.first.substr(strlen("kernel_panic,sysrq,")) + terminator);
817     } else if (android::base::StartsWith(m.first, "kernel_panic,")) {
818       correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("kernel_panic,")) + terminator);
819     }
820   }
821   for (pos = 0; pos < subReason.length(); ++pos) {
822     char c = subReason[pos];
823     if (!(::isprint(c) || likely_space(c)) || likely_newline(c) ||
824         (quoted && likely_single_quote(c))) {
825       subReason.erase(pos);
826       break;
827     }
828   }
829   transformReason(subReason);
830   return subReason;
831 }
832 
addKernelPanicSubReason(const pstoreConsole & console,std::string & ret)833 void addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) {
834   // Check for kernel panic types to refine information
835   if ((console.rfind("SysRq : Trigger a crash") != std::string::npos) ||
836       (console.rfind("PC is at sysrq_handle_crash+") != std::string::npos)) {
837     ret = "kernel_panic,sysrq";
838     // Invented for Android to allow daemons that specifically trigger sysrq
839     // to communicate more accurate boot subreasons via last console messages.
840     static constexpr char sysrqSubreason[] = "SysRq : Trigger a crash : '";
841     auto pos = console.rfind(sysrqSubreason);
842     if (pos != std::string::npos) {
843       ret += "," + getSubreason(console, pos + strlen(sysrqSubreason), /* quoted */ true);
844     }
845     return;
846   }
847   if (console.rfind("Unable to handle kernel NULL pointer dereference at virtual address") !=
848       std::string::npos) {
849     ret = "kernel_panic,null";
850     return;
851   }
852   if (console.rfind("Kernel BUG at ") != std::string::npos) {
853     ret = "kernel_panic,bug";
854     return;
855   }
856 
857   std::string panic("Kernel panic - not syncing: ");
858   auto pos = console.rfind(panic);
859   if (pos == std::string::npos) return;
860 
861   static const std::vector<std::pair<const std::string, const std::string>> panicReasons = {
862       {"Out of memory", "oom"},
863       {"out of memory", "oom"},
864       {"Oh boy, that early out of memory", "oom"},  // omg
865       {"BUG!", "bug"},
866       {"hung_task: blocked tasks", "hung"},
867       {"audit: ", "audit"},
868       {"scheduling while atomic", "atomic"},
869       {"Attempted to kill init!", "init"},
870       {"Requested init", "init"},
871       {"No working init", "init"},
872       {"Could not decompress init", "init"},
873       {"RCU Stall", "hung,rcu"},
874       {"stack-protector", "stack"},
875       {"kernel stack overflow", "stack"},
876       {"Corrupt kernel stack", "stack"},
877       {"low stack detected", "stack"},
878       {"corrupted stack end", "stack"},
879       {"subsys-restart: Resetting the SoC - modem crashed.", "modem"},
880       {"subsys-restart: Resetting the SoC - adsp crashed.", "adsp"},
881       {"subsys-restart: Resetting the SoC - dsps crashed.", "dsps"},
882       {"subsys-restart: Resetting the SoC - wcnss crashed.", "wcnss"},
883   };
884 
885   ret = "kernel_panic";
886   for (auto& s : panicReasons) {
887     if (console.find(panic + s.first, pos) != std::string::npos) {
888       ret += "," + s.second;
889       return;
890     }
891   }
892   auto reason = getSubreason(console, pos + panic.length(), /* newline */ false);
893   if (reason.length() > 3) {
894     ret += "," + reason;
895   }
896 }
897 
addKernelPanicSubReason(const std::string & content,std::string & ret)898 void addKernelPanicSubReason(const std::string& content, std::string& ret) {
899   addKernelPanicSubReason(pstoreConsole(content), ret);
900 }
901 
902 const char system_reboot_reason_property[] = "sys.boot.reason";
903 const char last_reboot_reason_property[] = LAST_REBOOT_REASON_PROPERTY;
904 const char last_reboot_reason_file[] = LAST_REBOOT_REASON_FILE;
905 const char last_last_reboot_reason_property[] = "sys.boot.reason.last";
906 constexpr size_t history_reboot_reason_size = 4;
907 const char history_reboot_reason_property[] = LAST_REBOOT_REASON_PROPERTY ".history";
908 const char bootloader_reboot_reason_property[] = "ro.boot.bootreason";
909 
910 // Land system_boot_reason into system_reboot_reason_property.
911 // Shift system_boot_reason into history_reboot_reason_property.
BootReasonAddToHistory(const std::string & system_boot_reason)912 void BootReasonAddToHistory(const std::string& system_boot_reason) {
913   if (system_boot_reason.empty()) return;
914   LOG(INFO) << "Canonical boot reason: " << system_boot_reason;
915   auto old_system_boot_reason = android::base::GetProperty(system_reboot_reason_property, "");
916   if (!android::base::SetProperty(system_reboot_reason_property, system_boot_reason)) {
917     android::base::SetProperty(system_reboot_reason_property,
918                                system_boot_reason.substr(0, PROPERTY_VALUE_MAX - 1));
919   }
920   auto reason_history =
921       android::base::Split(android::base::GetProperty(history_reboot_reason_property, ""), "\n");
922   static auto mark = time(nullptr);
923   auto mark_str = std::string(",") + std::to_string(mark);
924   auto marked_system_boot_reason = system_boot_reason + mark_str;
925   if (!reason_history.empty()) {
926     // delete any entries that we just wrote in a previous
927     // call and leveraging duplicate line handling
928     auto last = old_system_boot_reason + mark_str;
929     // trim the list to (history_reboot_reason_size - 1)
930     ssize_t max = history_reboot_reason_size;
931     for (auto it = reason_history.begin(); it != reason_history.end();) {
932       if (it->empty() || (last == *it) || (marked_system_boot_reason == *it) || (--max <= 0)) {
933         it = reason_history.erase(it);
934       } else {
935         last = *it;
936         ++it;
937       }
938     }
939   }
940   // insert at the front, concatenating mark (<epoch time>) detail to the value.
941   reason_history.insert(reason_history.begin(), marked_system_boot_reason);
942   // If the property string is too long ( > PROPERTY_VALUE_MAX)
943   // we get an error, so trim out last entry and try again.
944   while (!android::base::SetProperty(history_reboot_reason_property,
945                                      android::base::Join(reason_history, '\n'))) {
946     auto it = std::prev(reason_history.end());
947     if (it == reason_history.end()) break;
948     reason_history.erase(it);
949   }
950 }
951 
952 // Scrub, Sanitize, Standardize and Enhance the boot reason string supplied.
BootReasonStrToReason(const std::string & boot_reason)953 std::string BootReasonStrToReason(const std::string& boot_reason) {
954   auto ret = android::base::GetProperty(system_reboot_reason_property, "");
955   std::string reason(boot_reason);
956   // If sys.boot.reason == ro.boot.bootreason, let's re-evaluate
957   if (reason == ret) ret = "";
958 
959   transformReason(reason);
960 
961   // Is the current system boot reason sys.boot.reason valid?
962   if (!isKnownRebootReason(ret)) ret = "";
963 
964   if (ret == "") {
965     // Is the bootloader boot reason ro.boot.bootreason known?
966     std::vector<std::string> words(android::base::Split(reason, ",_-"));
967     for (auto& s : knownReasons) {
968       std::string blunt;
969       for (auto& r : words) {
970         if (r == s) {
971           if (isBluntRebootReason(s)) {
972             blunt = s;
973           } else {
974             ret = s;
975             break;
976           }
977         }
978       }
979       if (ret == "") ret = blunt;
980       if (ret != "") break;
981     }
982   }
983 
984   if (ret == "") {
985     // A series of checks to take some officially unsupported reasons
986     // reported by the bootloader and find some logical and canonical
987     // sense.  In an ideal world, we would require those bootloaders
988     // to behave and follow our CTS standards.
989     //
990     // first member is the output
991     // second member is an unanchored regex for an alias
992     //
993     // If output has a prefix of <bang> '!', we do not use it as a
994     // match needle (and drop the <bang> prefix when landing in output),
995     // otherwise look for it as well. This helps keep the scale of the
996     // following table smaller.
997     static const std::vector<std::pair<const std::string, const std::string>> aliasReasons = {
998         {"watchdog", "wdog"},
999         {"kernel_panic", "panic"},
1000         {"shutdown,thermal", "thermal"},
1001         {"warm,s3_wakeup", "s3_wakeup"},
1002         {"hard,hw_reset", "hw_reset"},
1003         {"cold,charger", "usb|power_on_cable"},
1004         {"cold,powerkey", "powerkey|power_key|PowerKey|power_on"},
1005         {"cold,rtc", "rtc"},
1006         {"cold,rtc,2sec", "2sec_reboot"},
1007         {"!warm", "wdt_by_pass_pwk"},  // change flavour of blunt
1008         {"!reboot", "^wdt$"},          // change flavour of blunt
1009         {"reboot,tool", "tool_by_pass_pwk"},
1010         {"!reboot,longkey", "reboot_longkey"},
1011         {"!reboot,longkey", "kpdpwr"},
1012         {"!reboot,undervoltage", "uvlo"},
1013         {"!reboot,powerloss", "smpl"},
1014         {"bootloader", ""},
1015     };
1016 
1017     for (auto& s : aliasReasons) {
1018       size_t firstHasNot = s.first[0] == '!';
1019       if (!firstHasNot && (reason.find(s.first) != std::string::npos)) {
1020         ret = s.first;
1021         break;
1022       }
1023       if (s.second.size() && std::regex_search(reason, std::regex(s.second))) {
1024         ret = s.first.substr(firstHasNot);
1025         break;
1026       }
1027     }
1028   }
1029 
1030   // If watchdog is the reason, see if there is a security angle?
1031   if (ret == "watchdog") {
1032     if (reason.find("sec") != std::string::npos) {
1033       ret += ",security";
1034     }
1035   }
1036 
1037   if (ret == "kernel_panic") {
1038     // Check to see if last klog has some refinement hints.
1039     std::string content;
1040     if (readPstoreConsole(content)) {
1041       addKernelPanicSubReason(content, ret);
1042     }
1043   } else if (isBluntRebootReason(ret)) {
1044     // Check the other available reason resources if the reason is still blunt.
1045 
1046     // Check to see if last klog has some refinement hints.
1047     std::string content;
1048     if (readPstoreConsole(content)) {
1049       const pstoreConsole console(content);
1050       // The toybox reboot command used directly (unlikely)? But also
1051       // catches init's response to Android's more controlled reboot command.
1052       if (console.rfind("reboot: Power down") != std::string::npos) {
1053         ret = "shutdown";  // Still too blunt, but more accurate.
1054         // ToDo: init should record the shutdown reason to kernel messages ala:
1055         //           init: shutdown system with command 'last_reboot_reason'
1056         //       so that if pstore has persistence we can get some details
1057         //       that could be missing in last_reboot_reason_property.
1058       }
1059 
1060       static const char cmd[] = "reboot: Restarting system with command '";
1061       size_t pos = console.rfind(cmd);
1062       if (pos != std::string::npos) {
1063         std::string subReason(getSubreason(content, pos + strlen(cmd), /* quoted */ true));
1064         if (subReason != "") {  // Will not land "reboot" as that is too blunt.
1065           if (isKernelRebootReason(subReason)) {
1066             ret = "reboot," + subReason;  // User space can't talk kernel reasons.
1067           } else if (isKnownRebootReason(subReason)) {
1068             ret = subReason;
1069           } else {
1070             ret = "reboot," + subReason;  // legitimize unknown reasons
1071           }
1072         }
1073         // Some bootloaders shutdown results record in last kernel message.
1074         if (!strcmp(ret.c_str(), "reboot,kernel_power_off_charging__reboot_system")) {
1075           ret = "shutdown";
1076         }
1077       }
1078 
1079       // Check for kernel panics, allowed to override reboot command.
1080       (void)addKernelPanicSubReason(console, ret);
1081     }
1082 
1083     // TODO: use the HAL to get battery level (http://b/77725702).
1084 
1085     // Is there a controlled shutdown hint in last_reboot_reason_property?
1086     if (isBluntRebootReason(ret)) {
1087       // Content buffer no longer will have console data. Beware if more
1088       // checks added below, that depend on parsing console content.
1089       if (!android::base::ReadFileToString(last_reboot_reason_file, &content)) {
1090         content = android::base::GetProperty(last_reboot_reason_property, "");
1091       }
1092       transformReason(content);
1093 
1094       // Anything in last is better than 'super-blunt' reboot or shutdown.
1095       if ((ret == "") || (ret == "reboot") || (ret == "shutdown") || !isBluntRebootReason(content)) {
1096         ret = content;
1097       }
1098     }
1099 
1100     // Other System Health HAL reasons?
1101 
1102     // ToDo: /proc/sys/kernel/boot_reason needs a HAL interface to
1103     //       possibly offer hardware-specific clues from the PMIC.
1104   }
1105 
1106   // If unknown left over from above, make it "reboot,<boot_reason>"
1107   if (ret == "") {
1108     ret = "reboot";
1109     if (android::base::StartsWith(reason, "reboot")) {
1110       reason = reason.substr(strlen("reboot"));
1111       while ((reason[0] == ',') || (reason[0] == '_')) {
1112         reason = reason.substr(1);
1113       }
1114     }
1115     if (reason != "") {
1116       ret += ",";
1117       ret += reason;
1118     }
1119   }
1120 
1121   LOG(INFO) << "Canonical boot reason: " << ret;
1122   return ret;
1123 }
1124 
1125 // Returns the appropriate metric key prefix for the boot_complete metric such
1126 // that boot metrics after a system update are labeled as ota_boot_complete;
1127 // otherwise, they are labeled as boot_complete.  This method encapsulates the
1128 // bookkeeping required to track when a system update has occurred by storing
1129 // the UTC timestamp of the system build date and comparing against the current
1130 // system build date.
CalculateBootCompletePrefix()1131 std::string CalculateBootCompletePrefix() {
1132   static const std::string kBuildDateKey = "build_date";
1133   std::string boot_complete_prefix = "boot_complete";
1134 
1135   auto build_date_str = android::base::GetProperty("ro.build.date.utc", "");
1136   int32_t build_date;
1137   if (!android::base::ParseInt(build_date_str, &build_date)) {
1138     return std::string();
1139   }
1140 
1141   BootEventRecordStore boot_event_store;
1142   BootEventRecordStore::BootEventRecord record;
1143   if (!boot_event_store.GetBootEvent(kBuildDateKey, &record)) {
1144     boot_complete_prefix = "factory_reset_" + boot_complete_prefix;
1145     boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date);
1146     BootReasonAddToHistory("reboot,factory_reset");
1147   } else if (build_date != record.second) {
1148     boot_complete_prefix = "ota_" + boot_complete_prefix;
1149     boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date);
1150     BootReasonAddToHistory("reboot,ota");
1151   }
1152 
1153   return boot_complete_prefix;
1154 }
1155 
1156 // Records the value of a given ro.boottime.init property in milliseconds.
RecordInitBootTimeProp(BootEventRecordStore * boot_event_store,const char * property)1157 void RecordInitBootTimeProp(BootEventRecordStore* boot_event_store, const char* property) {
1158   auto value = android::base::GetProperty(property, "");
1159 
1160   int32_t time_in_ms;
1161   if (android::base::ParseInt(value, &time_in_ms)) {
1162     boot_event_store->AddBootEventWithValue(property, time_in_ms);
1163   }
1164 }
1165 
1166 // A map from bootloader timing stage to the time that stage took during boot.
1167 typedef std::map<std::string, int32_t> BootloaderTimingMap;
1168 
1169 // Returns a mapping from bootloader stage names to the time those stages
1170 // took to boot.
GetBootLoaderTimings()1171 const BootloaderTimingMap GetBootLoaderTimings() {
1172   BootloaderTimingMap timings;
1173 
1174   // |ro.boot.boottime| is of the form 'stage1:time1,...,stageN:timeN',
1175   // where timeN is in milliseconds.
1176   auto value = android::base::GetProperty("ro.boot.boottime", "");
1177   if (value.empty()) {
1178     // ro.boot.boottime is not reported on all devices.
1179     return BootloaderTimingMap();
1180   }
1181 
1182   auto stages = android::base::Split(value, ",");
1183   for (const auto& stageTiming : stages) {
1184     // |stageTiming| is of the form 'stage:time'.
1185     auto stageTimingValues = android::base::Split(stageTiming, ":");
1186     DCHECK_EQ(2U, stageTimingValues.size());
1187 
1188     if (stageTimingValues.size() < 2) continue;
1189     std::string stageName = stageTimingValues[0];
1190     int32_t time_ms;
1191     if (android::base::ParseInt(stageTimingValues[1], &time_ms)) {
1192       timings[stageName] = time_ms;
1193     }
1194   }
1195 
1196   return timings;
1197 }
1198 
1199 // Returns the total bootloader boot time from the ro.boot.boottime system property.
GetBootloaderTime(const BootloaderTimingMap & bootloader_timings)1200 int32_t GetBootloaderTime(const BootloaderTimingMap& bootloader_timings) {
1201   int32_t total_time = 0;
1202   for (const auto& timing : bootloader_timings) {
1203     total_time += timing.second;
1204   }
1205 
1206   return total_time;
1207 }
1208 
1209 // Parses and records the set of bootloader stages and associated boot times
1210 // from the ro.boot.boottime system property.
RecordBootloaderTimings(BootEventRecordStore * boot_event_store,const BootloaderTimingMap & bootloader_timings)1211 void RecordBootloaderTimings(BootEventRecordStore* boot_event_store,
1212                              const BootloaderTimingMap& bootloader_timings) {
1213   int32_t total_time = 0;
1214   for (const auto& timing : bootloader_timings) {
1215     total_time += timing.second;
1216     boot_event_store->AddBootEventWithValue("boottime.bootloader." + timing.first, timing.second);
1217   }
1218 
1219   boot_event_store->AddBootEventWithValue("boottime.bootloader.total", total_time);
1220 }
1221 
1222 // Returns the closest estimation to the absolute device boot time, i.e.,
1223 // from power on to boot_complete, including bootloader times.
GetAbsoluteBootTime(const BootloaderTimingMap & bootloader_timings,std::chrono::milliseconds uptime)1224 std::chrono::milliseconds GetAbsoluteBootTime(const BootloaderTimingMap& bootloader_timings,
1225                                               std::chrono::milliseconds uptime) {
1226   int32_t bootloader_time_ms = 0;
1227 
1228   for (const auto& timing : bootloader_timings) {
1229     if (timing.first.compare("SW") != 0) {
1230       bootloader_time_ms += timing.second;
1231     }
1232   }
1233 
1234   auto bootloader_duration = std::chrono::milliseconds(bootloader_time_ms);
1235   return bootloader_duration + uptime;
1236 }
1237 
1238 // Records the closest estimation to the absolute device boot time in seconds.
1239 // i.e. from power on to boot_complete, including bootloader times.
RecordAbsoluteBootTime(BootEventRecordStore * boot_event_store,std::chrono::milliseconds absolute_total)1240 void RecordAbsoluteBootTime(BootEventRecordStore* boot_event_store,
1241                             std::chrono::milliseconds absolute_total) {
1242   auto absolute_total_sec = std::chrono::duration_cast<std::chrono::seconds>(absolute_total);
1243   boot_event_store->AddBootEventWithValue("absolute_boot_time", absolute_total_sec.count());
1244 }
1245 
1246 // Logs the total boot time and reason to statsd.
LogBootInfoToStatsd(std::chrono::milliseconds end_time,std::chrono::milliseconds total_duration,int32_t bootloader_duration_ms,double time_since_last_boot_sec)1247 void LogBootInfoToStatsd(std::chrono::milliseconds end_time,
1248                          std::chrono::milliseconds total_duration, int32_t bootloader_duration_ms,
1249                          double time_since_last_boot_sec) {
1250   auto reason = android::base::GetProperty(bootloader_reboot_reason_property, "<EMPTY>");
1251   auto system_reason = android::base::GetProperty(system_reboot_reason_property, "<EMPTY>");
1252   android::util::stats_write(android::util::BOOT_SEQUENCE_REPORTED, reason.c_str(),
1253                              system_reason.c_str(), end_time.count(), total_duration.count(),
1254                              (int64_t)bootloader_duration_ms,
1255                              (int64_t)time_since_last_boot_sec * 1000);
1256 }
1257 
SetSystemBootReason()1258 void SetSystemBootReason() {
1259   const auto bootloader_boot_reason =
1260       android::base::GetProperty(bootloader_reboot_reason_property, "");
1261   const std::string system_boot_reason(BootReasonStrToReason(bootloader_boot_reason));
1262   // Record the scrubbed system_boot_reason to the property
1263   BootReasonAddToHistory(system_boot_reason);
1264   // Shift last_reboot_reason_property to last_last_reboot_reason_property
1265   std::string last_boot_reason;
1266   if (!android::base::ReadFileToString(last_reboot_reason_file, &last_boot_reason)) {
1267     PLOG(ERROR) << "Failed to read " << last_reboot_reason_file;
1268     last_boot_reason = android::base::GetProperty(last_reboot_reason_property, "");
1269     LOG(INFO) << "Value of " << last_reboot_reason_property << " : " << last_boot_reason;
1270   } else {
1271     LOG(INFO) << "Last reboot reason read from " << last_reboot_reason_file << " : "
1272               << last_boot_reason << ". Last reboot reason read from "
1273               << last_reboot_reason_property << " : "
1274               << android::base::GetProperty(last_reboot_reason_property, "");
1275   }
1276   if (last_boot_reason.empty() || isKernelRebootReason(system_boot_reason)) {
1277     last_boot_reason = system_boot_reason;
1278   } else {
1279     transformReason(last_boot_reason);
1280   }
1281   LOG(INFO) << "Normalized last reboot reason : " << last_boot_reason;
1282   android::base::SetProperty(last_last_reboot_reason_property, last_boot_reason);
1283   android::base::SetProperty(last_reboot_reason_property, "");
1284   if (unlink(last_reboot_reason_file) != 0) {
1285     PLOG(ERROR) << "Failed to unlink " << last_reboot_reason_file;
1286   }
1287 }
1288 
1289 // Gets the boot time offset. This is useful when Android is running in a
1290 // container, because the boot_clock is not reset when Android reboots.
GetBootTimeOffset()1291 std::chrono::nanoseconds GetBootTimeOffset() {
1292   static const int64_t boottime_offset =
1293       android::base::GetIntProperty<int64_t>("ro.boot.boottime_offset", 0);
1294   return std::chrono::nanoseconds(boottime_offset);
1295 }
1296 
1297 // Returns the current uptime, accounting for any offset in the CLOCK_BOOTTIME
1298 // clock.
GetUptime()1299 android::base::boot_clock::duration GetUptime() {
1300   return android::base::boot_clock::now().time_since_epoch() - GetBootTimeOffset();
1301 }
1302 
1303 // Records several metrics related to the time it takes to boot the device.
RecordBootComplete()1304 void RecordBootComplete() {
1305   BootEventRecordStore boot_event_store;
1306   BootEventRecordStore::BootEventRecord record;
1307 
1308   auto uptime_ns = GetUptime();
1309   auto uptime_s = std::chrono::duration_cast<std::chrono::seconds>(uptime_ns);
1310   time_t current_time_utc = time(nullptr);
1311   time_t time_since_last_boot = 0;
1312 
1313   if (boot_event_store.GetBootEvent("last_boot_time_utc", &record)) {
1314     time_t last_boot_time_utc = record.second;
1315     time_since_last_boot = difftime(current_time_utc, last_boot_time_utc);
1316     boot_event_store.AddBootEventWithValue("time_since_last_boot", time_since_last_boot);
1317   }
1318 
1319   boot_event_store.AddBootEventWithValue("last_boot_time_utc", current_time_utc);
1320 
1321   // The boot_complete metric has two variants: boot_complete and
1322   // ota_boot_complete.  The latter signifies that the device is booting after
1323   // a system update.
1324   std::string boot_complete_prefix = CalculateBootCompletePrefix();
1325   if (boot_complete_prefix.empty()) {
1326     // The system is hosed because the build date property could not be read.
1327     return;
1328   }
1329 
1330   // The *_no_encryption events are emitted unconditionally, since they are left
1331   // over from a time when encryption meant "full-disk encryption".  But Android
1332   // now always uses file-based encryption instead of full-disk encryption.  At
1333   // some point, these misleading and redundant events should be removed.
1334   boot_event_store.AddBootEventWithValue(boot_complete_prefix + "_no_encryption",
1335                                          uptime_s.count());
1336 
1337   // Record the total time from device startup to boot complete.  Note: we are
1338   // recording seconds here even though the field in statsd atom specifies
1339   // milliseconds.
1340   boot_event_store.AddBootEventWithValue(boot_complete_prefix, uptime_s.count());
1341 
1342   RecordInitBootTimeProp(&boot_event_store, "ro.boottime.init");
1343   RecordInitBootTimeProp(&boot_event_store, "ro.boottime.init.first_stage");
1344   RecordInitBootTimeProp(&boot_event_store, "ro.boottime.init.selinux");
1345   RecordInitBootTimeProp(&boot_event_store, "ro.boottime.init.cold_boot_wait");
1346 
1347   const BootloaderTimingMap bootloader_timings = GetBootLoaderTimings();
1348   int32_t bootloader_boot_duration = GetBootloaderTime(bootloader_timings);
1349   RecordBootloaderTimings(&boot_event_store, bootloader_timings);
1350 
1351   auto uptime_ms = std::chrono::duration_cast<std::chrono::milliseconds>(uptime_ns);
1352   auto absolute_boot_time = GetAbsoluteBootTime(bootloader_timings, uptime_ms);
1353   RecordAbsoluteBootTime(&boot_event_store, absolute_boot_time);
1354 
1355   auto boot_end_time_point = std::chrono::system_clock::now().time_since_epoch();
1356   auto boot_end_time = std::chrono::duration_cast<std::chrono::milliseconds>(boot_end_time_point);
1357 
1358   LogBootInfoToStatsd(boot_end_time, absolute_boot_time, bootloader_boot_duration,
1359                       time_since_last_boot);
1360 }
1361 
1362 // Records the boot_reason metric by querying the ro.boot.bootreason system
1363 // property.
RecordBootReason()1364 void RecordBootReason() {
1365   const auto reason = android::base::GetProperty(bootloader_reboot_reason_property, "");
1366 
1367   if (reason.empty()) {
1368     // TODO(b/148575354): Replace with statsd.
1369     // Log an empty boot reason value as '<EMPTY>' to ensure the value is intentional
1370     // (and not corruption anywhere else in the reporting pipeline).
1371     // android::metricslogger::LogMultiAction(android::metricslogger::ACTION_BOOT,
1372     //                                        android::metricslogger::FIELD_PLATFORM_REASON,
1373     //                                        "<EMPTY>");
1374   } else {
1375     // TODO(b/148575354): Replace with statsd.
1376     // android::metricslogger::LogMultiAction(android::metricslogger::ACTION_BOOT,
1377     //                                        android::metricslogger::FIELD_PLATFORM_REASON,
1378     //                                        reason);
1379   }
1380 
1381   // Log the raw bootloader_boot_reason property value.
1382   int32_t boot_reason = BootReasonStrToEnum(reason);
1383   BootEventRecordStore boot_event_store;
1384   boot_event_store.AddBootEventWithValue("boot_reason", boot_reason);
1385 
1386   // Log the scrubbed system_boot_reason.
1387   const auto system_reason = android::base::GetProperty(system_reboot_reason_property, "");
1388   int32_t system_boot_reason = BootReasonStrToEnum(system_reason);
1389   boot_event_store.AddBootEventWithValue("system_boot_reason", system_boot_reason);
1390 
1391   if (reason == "") {
1392     android::base::SetProperty(bootloader_reboot_reason_property, system_reason);
1393   }
1394 }
1395 
1396 // Records two metrics related to the user resetting a device: the time at
1397 // which the device is reset, and the time since the user last reset the
1398 // device.  The former is only set once per-factory reset.
RecordFactoryReset()1399 void RecordFactoryReset() {
1400   BootEventRecordStore boot_event_store;
1401   BootEventRecordStore::BootEventRecord record;
1402 
1403   time_t current_time_utc = time(nullptr);
1404 
1405   if (current_time_utc < 0) {
1406     // UMA does not display negative values in buckets, so convert to positive.
1407     // Logging via BootEventRecordStore.
1408     android::util::stats_write(
1409         static_cast<int32_t>(android::util::BOOT_TIME_EVENT_ERROR_CODE_REPORTED),
1410         static_cast<int32_t>(
1411             android::util::BOOT_TIME_EVENT_ERROR_CODE__EVENT__FACTORY_RESET_CURRENT_TIME_FAILURE),
1412         static_cast<int32_t>(std::abs(current_time_utc)));
1413 
1414     // Logging via BootEventRecordStore to see if using android::metricslogger::LogHistogram
1415     // is losing records somehow.
1416     boot_event_store.AddBootEventWithValue("factory_reset_current_time_failure",
1417                                            std::abs(current_time_utc));
1418     return;
1419   } else {
1420     android::util::stats_write(
1421         static_cast<int32_t>(android::util::BOOT_TIME_EVENT_UTC_TIME_REPORTED),
1422         static_cast<int32_t>(
1423             android::util::BOOT_TIME_EVENT_UTC_TIME__EVENT__FACTORY_RESET_CURRENT_TIME),
1424         static_cast<int64_t>(current_time_utc));
1425 
1426     // Logging via BootEventRecordStore to see if using android::metricslogger::LogHistogram
1427     // is losing records somehow.
1428     boot_event_store.AddBootEventWithValue("factory_reset_current_time", current_time_utc);
1429   }
1430 
1431   // The factory_reset boot event does not exist after the device is reset, so
1432   // use this signal to mark the time of the factory reset.
1433   if (!boot_event_store.GetBootEvent("factory_reset", &record)) {
1434     boot_event_store.AddBootEventWithValue("factory_reset", current_time_utc);
1435 
1436     // Don't log the time_since_factory_reset until some time has elapsed.
1437     // The data is not meaningful yet and skews the histogram buckets.
1438     return;
1439   }
1440 
1441   // Calculate and record the difference in time between now and the
1442   // factory_reset time.
1443   time_t factory_reset_utc = record.second;
1444   android::util::stats_write(
1445       static_cast<int32_t>(android::util::BOOT_TIME_EVENT_UTC_TIME_REPORTED),
1446       static_cast<int32_t>(
1447           android::util::BOOT_TIME_EVENT_UTC_TIME__EVENT__FACTORY_RESET_RECORD_VALUE),
1448       static_cast<int64_t>(factory_reset_utc));
1449 
1450   // Logging via BootEventRecordStore to see if using android::metricslogger::LogHistogram
1451   // is losing records somehow.
1452   boot_event_store.AddBootEventWithValue("factory_reset_record_value", factory_reset_utc);
1453 
1454   time_t time_since_factory_reset = difftime(current_time_utc, factory_reset_utc);
1455   boot_event_store.AddBootEventWithValue("time_since_factory_reset", time_since_factory_reset);
1456 }
1457 
1458 // List the associated boot reason(s), if arg is nullptr then all.
PrintBootReasonEnum(const char * arg)1459 void PrintBootReasonEnum(const char* arg) {
1460   int value = -1;
1461   if (arg != nullptr) {
1462     value = BootReasonStrToEnum(arg);
1463   }
1464   for (const auto& [match, id] : kBootReasonMap) {
1465     if ((value < 0) || (value == id)) {
1466       printf("%u\t%s\n", id, match.c_str());
1467     }
1468   }
1469 }
1470 
1471 }  // namespace
1472 
main(int argc,char ** argv)1473 int main(int argc, char** argv) {
1474   android::base::InitLogging(argv);
1475 
1476   const std::string cmd_line = GetCommandLine(argc, argv);
1477   LOG(INFO) << "Service started: " << cmd_line;
1478 
1479   int option_index = 0;
1480   static const char value_str[] = "value";
1481   static const char system_boot_reason_str[] = "set_system_boot_reason";
1482   static const char boot_complete_str[] = "record_boot_complete";
1483   static const char boot_reason_str[] = "record_boot_reason";
1484   static const char factory_reset_str[] = "record_time_since_factory_reset";
1485   static const char boot_reason_enum_str[] = "boot_reason_enum";
1486   static const struct option long_options[] = {
1487       // clang-format off
1488       { "help",                 no_argument,       NULL,   'h' },
1489       { "log",                  no_argument,       NULL,   'l' },
1490       { "print",                no_argument,       NULL,   'p' },
1491       { "record",               required_argument, NULL,   'r' },
1492       { value_str,              required_argument, NULL,   0 },
1493       { system_boot_reason_str, no_argument,       NULL,   0 },
1494       { boot_complete_str,      no_argument,       NULL,   0 },
1495       { boot_reason_str,        no_argument,       NULL,   0 },
1496       { factory_reset_str,      no_argument,       NULL,   0 },
1497       { boot_reason_enum_str,   optional_argument, NULL,   0 },
1498       { NULL,                   0,                 NULL,   0 }
1499       // clang-format on
1500   };
1501 
1502   std::string boot_event;
1503   std::string value;
1504   int opt = 0;
1505   while ((opt = getopt_long(argc, argv, "hlpr:", long_options, &option_index)) != -1) {
1506     switch (opt) {
1507       // This case handles long options which have no single-character mapping.
1508       case 0: {
1509         const std::string option_name = long_options[option_index].name;
1510         if (option_name == value_str) {
1511           // |optarg| is an external variable set by getopt representing
1512           // the option argument.
1513           value = optarg;
1514         } else if (option_name == system_boot_reason_str) {
1515           SetSystemBootReason();
1516         } else if (option_name == boot_complete_str) {
1517           RecordBootComplete();
1518         } else if (option_name == boot_reason_str) {
1519           RecordBootReason();
1520         } else if (option_name == factory_reset_str) {
1521           RecordFactoryReset();
1522         } else if (option_name == boot_reason_enum_str) {
1523           PrintBootReasonEnum(optarg);
1524         } else {
1525           LOG(ERROR) << "Invalid option: " << option_name;
1526         }
1527         break;
1528       }
1529 
1530       case 'h': {
1531         ShowHelp(argv[0]);
1532         break;
1533       }
1534 
1535       case 'l': {
1536         LogBootEvents();
1537         break;
1538       }
1539 
1540       case 'p': {
1541         PrintBootEvents();
1542         break;
1543       }
1544 
1545       case 'r': {
1546         // |optarg| is an external variable set by getopt representing
1547         // the option argument.
1548         boot_event = optarg;
1549         break;
1550       }
1551 
1552       default: {
1553         DCHECK_EQ(opt, '?');
1554 
1555         // |optopt| is an external variable set by getopt representing
1556         // the value of the invalid option.
1557         LOG(ERROR) << "Invalid option: " << optopt;
1558         ShowHelp(argv[0]);
1559         return EXIT_FAILURE;
1560       }
1561     }
1562   }
1563 
1564   if (!boot_event.empty()) {
1565     RecordBootEventFromCommandLine(boot_event, value);
1566   }
1567 
1568   return 0;
1569 }
1570