1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "host/commands/run_cvd/boot_state_machine.h"
18 
19 #include <poll.h>
20 
21 #include <memory>
22 #include <thread>
23 
24 #include <android-base/file.h>
25 #include <android-base/logging.h>
26 #include <gflags/gflags.h>
27 
28 #include "common/libs/fs/shared_fd.h"
29 #include "common/libs/utils/tee_logging.h"
30 #include "host/commands/assemble_cvd/flags_defaults.h"
31 #include "host/commands/kernel_log_monitor/kernel_log_server.h"
32 #include "host/commands/kernel_log_monitor/utils.h"
33 #include "host/commands/run_cvd/validate.h"
34 #include "host/libs/command_util/runner/defs.h"
35 #include "host/libs/command_util/util.h"
36 #include "host/libs/config/feature.h"
37 
38 DEFINE_int32(reboot_notification_fd, CF_DEFAULTS_REBOOT_NOTIFICATION_FD,
39              "A file descriptor to notify when boot completes.");
40 
41 namespace cuttlefish {
42 namespace {
43 
44 // Forks run_cvd into a daemonized child process. The current process continues
45 // only until the child has signalled that the boot is finished.
46 //
47 // `DaemonizeLauncher` returns the write end of a pipe. The child is expected
48 // to write a `RunnerExitCodes` into the pipe when the boot finishes.
DaemonizeLauncher(const CuttlefishConfig & config)49 Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) {
50   auto instance = config.ForDefaultInstance();
51   SharedFD read_end, write_end;
52   CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe");
53   auto pid = fork();
54   if (pid) {
55     // Explicitly close here, otherwise we may end up reading forever if the
56     // child process dies.
57     write_end->Close();
58     RunnerExitCodes exit_code;
59     auto bytes_read = read_end->Read(&exit_code, sizeof(exit_code));
60     if (bytes_read != sizeof(exit_code)) {
61       LOG(ERROR) << "Failed to read a complete exit code, read " << bytes_read
62                  << " bytes only instead of the expected " << sizeof(exit_code);
63       exit_code = RunnerExitCodes::kPipeIOError;
64     } else if (exit_code == RunnerExitCodes::kSuccess) {
65       if (IsRestoring(config)) {
66         LOG(INFO) << "Virtual device restored successfully";
67       } else {
68         LOG(INFO) << "Virtual device booted successfully";
69       }
70     } else if (exit_code == RunnerExitCodes::kVirtualDeviceBootFailed) {
71       if (IsRestoring(config)) {
72         LOG(ERROR) << "Virtual device failed to restore";
73       } else {
74         LOG(ERROR) << "Virtual device failed to boot";
75       }
76       if (!instance.fail_fast()) {
77         LOG(ERROR) << "Device has been left running for debug";
78       }
79     } else {
80       LOG(ERROR) << "Unexpected exit code: " << exit_code;
81     }
82     if (!IsRestoring(config)) {
83       if (exit_code == RunnerExitCodes::kSuccess) {
84         LOG(INFO) << kBootCompletedMessage;
85       } else {
86         LOG(INFO) << kBootFailedMessage;
87       }
88     }
89     std::exit(exit_code);
90   } else {
91     // The child returns the write end of the pipe
92     if (daemon(/*nochdir*/ 1, /*noclose*/ 1) != 0) {
93       LOG(ERROR) << "Failed to daemonize child process: " << strerror(errno);
94       std::exit(RunnerExitCodes::kDaemonizationError);
95     }
96     // Redirect standard I/O
97     auto log_path = instance.launcher_log_path();
98     auto log = SharedFD::Open(log_path.c_str(), O_CREAT | O_WRONLY | O_APPEND,
99                               S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
100     if (!log->IsOpen()) {
101       LOG(ERROR) << "Failed to create launcher log file: " << log->StrError();
102       std::exit(RunnerExitCodes::kDaemonizationError);
103     }
104     ::android::base::SetLogger(
105         TeeLogger({{LogFileSeverity(), log, MetadataLevel::FULL}}));
106     auto dev_null = SharedFD::Open("/dev/null", O_RDONLY);
107     if (!dev_null->IsOpen()) {
108       LOG(ERROR) << "Failed to open /dev/null: " << dev_null->StrError();
109       std::exit(RunnerExitCodes::kDaemonizationError);
110     }
111     if (dev_null->UNMANAGED_Dup2(0) < 0) {
112       LOG(ERROR) << "Failed dup2 stdin: " << dev_null->StrError();
113       std::exit(RunnerExitCodes::kDaemonizationError);
114     }
115     if (log->UNMANAGED_Dup2(1) < 0) {
116       LOG(ERROR) << "Failed dup2 stdout: " << log->StrError();
117       std::exit(RunnerExitCodes::kDaemonizationError);
118     }
119     if (log->UNMANAGED_Dup2(2) < 0) {
120       LOG(ERROR) << "Failed dup2 seterr: " << log->StrError();
121       std::exit(RunnerExitCodes::kDaemonizationError);
122     }
123 
124     read_end->Close();
125     return write_end;
126   }
127 }
128 
ProcessLeader(const CuttlefishConfig & config,const CuttlefishConfig::InstanceSpecific & instance,AutoSetup<ValidateTapDevices>::Type &)129 Result<SharedFD> ProcessLeader(
130     const CuttlefishConfig& config,
131     const CuttlefishConfig::InstanceSpecific& instance,
132     AutoSetup<ValidateTapDevices>::Type& /* dependency */) {
133   if (IsRestoring(config)) {
134     CF_EXPECT(SharedFD::Fifo(instance.restore_adbd_pipe_name(), 0600),
135               "Unable to create adbd restore fifo");
136   }
137   /* These two paths result in pretty different process state, but both
138    * achieve the same goal of making the current process the leader of a
139    * process group, and are therefore grouped together. */
140   if (instance.run_as_daemon()) {
141     return CF_EXPECT(DaemonizeLauncher(config), "DaemonizeLauncher failed");
142   }
143   // Make sure the launcher runs in its own process group even when running
144   // in the foreground
145   if (getsid(0) != getpid()) {
146     CF_EXPECTF(setpgid(0, 0) == 0, "Failed to create new process group: {}",
147                strerror(errno));
148   }
149   return {};
150 }
151 
152 // Maintains the state of the boot process, once a final state is reached
153 // (success or failure) it sends the appropriate exit code to the foreground
154 // launcher process
155 class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
156  public:
INJECT(CvdBootStateMachine (const CuttlefishConfig & config,AutoSetup<ProcessLeader>::Type & process_leader,KernelLogPipeProvider & kernel_log_pipe_provider,const vm_manager::VmManager & vm_manager,const CuttlefishConfig::InstanceSpecific & instance))157   INJECT(
158       CvdBootStateMachine(const CuttlefishConfig& config,
159                           AutoSetup<ProcessLeader>::Type& process_leader,
160                           KernelLogPipeProvider& kernel_log_pipe_provider,
161                           const vm_manager::VmManager& vm_manager,
162                           const CuttlefishConfig::InstanceSpecific& instance))
163       : config_(config),
164         process_leader_(process_leader),
165         kernel_log_pipe_provider_(kernel_log_pipe_provider),
166         vm_manager_(vm_manager),
167         instance_(instance),
168         state_(kBootStarted) {}
169 
~CvdBootStateMachine()170   ~CvdBootStateMachine() {
171     if (interrupt_fd_write_->IsOpen()) {
172       char c = 1;
173       CHECK_EQ(interrupt_fd_write_->Write(&c, 1), 1)
174           << interrupt_fd_write_->StrError();
175     }
176     if (boot_event_handler_.joinable()) {
177       boot_event_handler_.join();
178     }
179     if (restore_complete_stop_write_->IsOpen()) {
180       char c = 1;
181       CHECK_EQ(restore_complete_stop_write_->Write(&c, 1), 1)
182           << restore_complete_stop_write_->StrError();
183     }
184     if (restore_complete_handler_.joinable()) {
185       restore_complete_handler_.join();
186     }
187   }
188 
189   // SetupFeature
Name() const190   std::string Name() const override { return "CvdBootStateMachine"; }
Enabled() const191   bool Enabled() const override { return true; }
192 
193  private:
Dependencies() const194   std::unordered_set<SetupFeature*> Dependencies() const {
195     return {
196         static_cast<SetupFeature*>(&process_leader_),
197         static_cast<SetupFeature*>(&kernel_log_pipe_provider_),
198     };
199   }
ResultSetup()200   Result<void> ResultSetup() override {
201     CF_EXPECT(SharedFD::Pipe(&interrupt_fd_read_, &interrupt_fd_write_));
202     CF_EXPECT(interrupt_fd_read_->IsOpen(), interrupt_fd_read_->StrError());
203     CF_EXPECT(interrupt_fd_write_->IsOpen(), interrupt_fd_write_->StrError());
204     fg_launcher_pipe_ = *process_leader_;
205     if (FLAGS_reboot_notification_fd >= 0) {
206       reboot_notification_ = SharedFD::Dup(FLAGS_reboot_notification_fd);
207       CF_EXPECTF(reboot_notification_->IsOpen(),
208                  "Could not dup fd given for reboot_notification_fd: {}",
209                  reboot_notification_->StrError());
210       close(FLAGS_reboot_notification_fd);
211     }
212     SharedFD boot_events_pipe = kernel_log_pipe_provider_.KernelLogPipe();
213     CF_EXPECTF(boot_events_pipe->IsOpen(), "Could not get boot events pipe: {}",
214                boot_events_pipe->StrError());
215 
216     // Pipe to tell `ThreadLoop` that the restore is complete.
217     SharedFD restore_complete_pipe, restore_complete_pipe_write;
218     // Pipe to tell `restore_complete_handler_` thread to give up.
219     // It isn't perfect, can only break out of the `WaitForRestoreComplete`
220     // step.
221     SharedFD restore_complete_stop_read;
222     if (IsRestoring(config_)) {
223       CF_EXPECT(
224           SharedFD::Pipe(&restore_complete_pipe, &restore_complete_pipe_write),
225           "unable to create pipe");
226       CF_EXPECT(SharedFD::Pipe(&restore_complete_stop_read,
227                                &restore_complete_stop_write_),
228                 "unable to create pipe");
229 
230       restore_complete_handler_ = std::thread(
231           [this, restore_complete_pipe_write, restore_complete_stop_read]() {
232             const auto result =
233                 vm_manager_.WaitForRestoreComplete(restore_complete_stop_read);
234             CHECK(result.ok()) << "Failed to wait for restore complete: "
235                                << result.error().FormatForEnv();
236             if (!result.value()) {
237               return;
238             }
239 
240             cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open(
241                 config_.ForDefaultInstance().restore_adbd_pipe_name().c_str(),
242                 O_WRONLY);
243             CHECK(restore_adbd_pipe->IsOpen())
244                 << "Error opening adbd restore pipe: "
245                 << restore_adbd_pipe->StrError();
246             CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1)
247                 << "Error writing to adbd restore pipe: "
248                 << restore_adbd_pipe->StrError() << ". This is unrecoverable.";
249 
250             auto SubtoolPath = [](const std::string& subtool_name) {
251               auto my_own_dir = android::base::GetExecutableDirectory();
252               std::stringstream subtool_path_stream;
253               subtool_path_stream << my_own_dir << "/" << subtool_name;
254               auto subtool_path = subtool_path_stream.str();
255               if (my_own_dir.empty() || !FileExists(subtool_path)) {
256                 return HostBinaryPath(subtool_name);
257               }
258               return subtool_path;
259             };
260             const auto adb_bin_path = SubtoolPath("adb");
261             CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
262                            "wait-for-device"},
263                           SubprocessOptions(), WEXITED)
264                       .ok())
265                 << "Failed to suspend bluetooth manager.";
266             CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
267                            "shell", "cmd", "bluetooth_manager", "enable"},
268                           SubprocessOptions(), WEXITED)
269                       .ok());
270             CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
271                            "shell", "cmd", "uwb", "enable-uwb"},
272                           SubprocessOptions(), WEXITED)
273                       .ok());
274             // Done last so that adb is more likely to be ready.
275             CHECK(cuttlefish::WriteAll(restore_complete_pipe_write, "1") == 1)
276                 << "Error writing to restore complete pipe: "
277                 << restore_complete_pipe_write->StrError()
278                 << ". This is unrecoverable.";
279           });
280     }
281 
282     boot_event_handler_ =
283         std::thread([this, boot_events_pipe, restore_complete_pipe]() {
284           ThreadLoop(boot_events_pipe, restore_complete_pipe);
285         });
286 
287     return {};
288   }
289 
ThreadLoop(SharedFD boot_events_pipe,SharedFD restore_complete_pipe)290   void ThreadLoop(SharedFD boot_events_pipe, SharedFD restore_complete_pipe) {
291     while (true) {
292       std::vector<PollSharedFd> poll_shared_fd = {
293           {
294               .fd = boot_events_pipe,
295               .events = POLLIN | POLLHUP,
296           },
297           {
298               .fd = restore_complete_pipe,
299               .events = restore_complete_pipe->IsOpen()
300                             ? (short)(POLLIN | POLLHUP)
301                             : (short)0,
302           },
303           {
304               .fd = interrupt_fd_read_,
305               .events = POLLIN | POLLHUP,
306           },
307       };
308       int result = SharedFD::Poll(poll_shared_fd, -1);
309       // interrupt_fd_read_
310       if (poll_shared_fd[2].revents & POLLIN) {
311         return;
312       }
313       if (result < 0) {
314         PLOG(FATAL) << "Failed to call Select";
315         return;
316       }
317       // boot_events_pipe
318       if (poll_shared_fd[0].revents & POLLHUP) {
319         LOG(ERROR) << "Failed to read a complete kernel log boot event.";
320         state_ |= kGuestBootFailed;
321         if (MaybeWriteNotification()) {
322           break;
323         }
324       }
325       if (poll_shared_fd[0].revents & POLLIN) {
326         auto sent_code = OnBootEvtReceived(boot_events_pipe);
327         if (sent_code) {
328           if (!BootCompleted()) {
329             if (!instance_.fail_fast()) {
330               LOG(ERROR) << "Device running, likely in a bad state";
331               break;
332             }
333             auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5);
334             CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv();
335             auto fail_res = RunLauncherAction(
336                 *monitor_res, LauncherAction::kFail, std::optional<int>());
337             CHECK(fail_res.ok()) << fail_res.error().FormatForEnv();
338           }
339           break;
340         }
341       }
342       // restore_complete_pipe
343       if (poll_shared_fd[1].revents & POLLIN) {
344         char buff[1];
345         auto read = restore_complete_pipe->Read(buff, 1);
346         if (read <= 0) {
347           LOG(ERROR) << "Could not read restore pipe: "
348                      << restore_complete_pipe->StrError();
349           state_ |= kGuestBootFailed;
350           if (MaybeWriteNotification()) {
351             break;
352           }
353         }
354         state_ |= kGuestBootCompleted;
355         if (MaybeWriteNotification()) {
356           break;
357         }
358       }
359       if (poll_shared_fd[1].revents & POLLHUP) {
360         LOG(ERROR) << "restore_complete_pipe closed unexpectedly";
361         state_ |= kGuestBootFailed;
362         if (MaybeWriteNotification()) {
363           break;
364         }
365       }
366     }
367   }
368 
369   // Returns true if the machine is left in a final state
OnBootEvtReceived(SharedFD boot_events_pipe)370   bool OnBootEvtReceived(SharedFD boot_events_pipe) {
371     std::optional<monitor::ReadEventResult> read_result =
372         monitor::ReadEvent(boot_events_pipe);
373     if (!read_result) {
374       LOG(ERROR) << "Failed to read a complete kernel log boot event.";
375       state_ |= kGuestBootFailed;
376       return MaybeWriteNotification();
377     }
378 
379     if (read_result->event == monitor::Event::BootCompleted) {
380       LOG(INFO) << "Virtual device booted successfully";
381       state_ |= kGuestBootCompleted;
382     } else if (read_result->event == monitor::Event::BootFailed) {
383       LOG(ERROR) << "Virtual device failed to boot";
384       state_ |= kGuestBootFailed;
385     }  // Ignore the other signals
386 
387     return MaybeWriteNotification();
388   }
BootCompleted() const389   bool BootCompleted() const { return state_ & kGuestBootCompleted; }
BootFailed() const390   bool BootFailed() const { return state_ & kGuestBootFailed; }
391 
SendExitCode(RunnerExitCodes exit_code,SharedFD fd)392   void SendExitCode(RunnerExitCodes exit_code, SharedFD fd) {
393     fd->Write(&exit_code, sizeof(exit_code));
394     // The foreground process will exit after receiving the exit code, if we try
395     // to write again we'll get a SIGPIPE
396     fd->Close();
397   }
MaybeWriteNotification()398   bool MaybeWriteNotification() {
399     std::vector<SharedFD> fds = {reboot_notification_, fg_launcher_pipe_};
400     for (auto& fd : fds) {
401       if (fd->IsOpen()) {
402         if (BootCompleted()) {
403           SendExitCode(RunnerExitCodes::kSuccess, fd);
404         } else if (state_ & kGuestBootFailed) {
405           SendExitCode(RunnerExitCodes::kVirtualDeviceBootFailed, fd);
406         }
407       }
408     }
409     // Either we sent the code before or just sent it, in any case the state is
410     // final
411     return BootCompleted() || (state_ & kGuestBootFailed);
412   }
413 
414   const CuttlefishConfig& config_;
415   AutoSetup<ProcessLeader>::Type& process_leader_;
416   KernelLogPipeProvider& kernel_log_pipe_provider_;
417   const vm_manager::VmManager& vm_manager_;
418   const CuttlefishConfig::InstanceSpecific& instance_;
419 
420   std::thread boot_event_handler_;
421   std::thread restore_complete_handler_;
422   SharedFD restore_complete_stop_write_;
423   SharedFD fg_launcher_pipe_;
424   SharedFD reboot_notification_;
425   SharedFD interrupt_fd_read_;
426   SharedFD interrupt_fd_write_;
427   int state_;
428   static const int kBootStarted = 0;
429   static const int kGuestBootCompleted = 1 << 0;
430   static const int kGuestBootFailed = 1 << 1;
431 };
432 
433 }  // namespace
434 
435 fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider,
436                                  const CuttlefishConfig::InstanceSpecific,
437                                  const vm_manager::VmManager,
438                                  AutoSetup<ValidateTapDevices>::Type>>
bootStateMachineComponent()439 bootStateMachineComponent() {
440   return fruit::createComponent()
441       .addMultibinding<KernelLogPipeConsumer, CvdBootStateMachine>()
442       .addMultibinding<SetupFeature, CvdBootStateMachine>()
443       .install(AutoSetup<ProcessLeader>::Component);
444 }
445 
446 }  // namespace cuttlefish
447