1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "host/commands/run_cvd/boot_state_machine.h"
18
19 #include <poll.h>
20
21 #include <memory>
22 #include <thread>
23
24 #include <android-base/file.h>
25 #include <android-base/logging.h>
26 #include <gflags/gflags.h>
27
28 #include "common/libs/fs/shared_fd.h"
29 #include "common/libs/utils/tee_logging.h"
30 #include "host/commands/assemble_cvd/flags_defaults.h"
31 #include "host/commands/kernel_log_monitor/kernel_log_server.h"
32 #include "host/commands/kernel_log_monitor/utils.h"
33 #include "host/commands/run_cvd/validate.h"
34 #include "host/libs/command_util/runner/defs.h"
35 #include "host/libs/command_util/util.h"
36 #include "host/libs/config/feature.h"
37
38 DEFINE_int32(reboot_notification_fd, CF_DEFAULTS_REBOOT_NOTIFICATION_FD,
39 "A file descriptor to notify when boot completes.");
40
41 namespace cuttlefish {
42 namespace {
43
44 // Forks run_cvd into a daemonized child process. The current process continues
45 // only until the child has signalled that the boot is finished.
46 //
47 // `DaemonizeLauncher` returns the write end of a pipe. The child is expected
48 // to write a `RunnerExitCodes` into the pipe when the boot finishes.
DaemonizeLauncher(const CuttlefishConfig & config)49 Result<SharedFD> DaemonizeLauncher(const CuttlefishConfig& config) {
50 auto instance = config.ForDefaultInstance();
51 SharedFD read_end, write_end;
52 CF_EXPECT(SharedFD::Pipe(&read_end, &write_end), "Unable to create pipe");
53 auto pid = fork();
54 if (pid) {
55 // Explicitly close here, otherwise we may end up reading forever if the
56 // child process dies.
57 write_end->Close();
58 RunnerExitCodes exit_code;
59 auto bytes_read = read_end->Read(&exit_code, sizeof(exit_code));
60 if (bytes_read != sizeof(exit_code)) {
61 LOG(ERROR) << "Failed to read a complete exit code, read " << bytes_read
62 << " bytes only instead of the expected " << sizeof(exit_code);
63 exit_code = RunnerExitCodes::kPipeIOError;
64 } else if (exit_code == RunnerExitCodes::kSuccess) {
65 if (IsRestoring(config)) {
66 LOG(INFO) << "Virtual device restored successfully";
67 } else {
68 LOG(INFO) << "Virtual device booted successfully";
69 }
70 } else if (exit_code == RunnerExitCodes::kVirtualDeviceBootFailed) {
71 if (IsRestoring(config)) {
72 LOG(ERROR) << "Virtual device failed to restore";
73 } else {
74 LOG(ERROR) << "Virtual device failed to boot";
75 }
76 if (!instance.fail_fast()) {
77 LOG(ERROR) << "Device has been left running for debug";
78 }
79 } else {
80 LOG(ERROR) << "Unexpected exit code: " << exit_code;
81 }
82 if (!IsRestoring(config)) {
83 if (exit_code == RunnerExitCodes::kSuccess) {
84 LOG(INFO) << kBootCompletedMessage;
85 } else {
86 LOG(INFO) << kBootFailedMessage;
87 }
88 }
89 std::exit(exit_code);
90 } else {
91 // The child returns the write end of the pipe
92 if (daemon(/*nochdir*/ 1, /*noclose*/ 1) != 0) {
93 LOG(ERROR) << "Failed to daemonize child process: " << strerror(errno);
94 std::exit(RunnerExitCodes::kDaemonizationError);
95 }
96 // Redirect standard I/O
97 auto log_path = instance.launcher_log_path();
98 auto log = SharedFD::Open(log_path.c_str(), O_CREAT | O_WRONLY | O_APPEND,
99 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
100 if (!log->IsOpen()) {
101 LOG(ERROR) << "Failed to create launcher log file: " << log->StrError();
102 std::exit(RunnerExitCodes::kDaemonizationError);
103 }
104 ::android::base::SetLogger(
105 TeeLogger({{LogFileSeverity(), log, MetadataLevel::FULL}}));
106 auto dev_null = SharedFD::Open("/dev/null", O_RDONLY);
107 if (!dev_null->IsOpen()) {
108 LOG(ERROR) << "Failed to open /dev/null: " << dev_null->StrError();
109 std::exit(RunnerExitCodes::kDaemonizationError);
110 }
111 if (dev_null->UNMANAGED_Dup2(0) < 0) {
112 LOG(ERROR) << "Failed dup2 stdin: " << dev_null->StrError();
113 std::exit(RunnerExitCodes::kDaemonizationError);
114 }
115 if (log->UNMANAGED_Dup2(1) < 0) {
116 LOG(ERROR) << "Failed dup2 stdout: " << log->StrError();
117 std::exit(RunnerExitCodes::kDaemonizationError);
118 }
119 if (log->UNMANAGED_Dup2(2) < 0) {
120 LOG(ERROR) << "Failed dup2 seterr: " << log->StrError();
121 std::exit(RunnerExitCodes::kDaemonizationError);
122 }
123
124 read_end->Close();
125 return write_end;
126 }
127 }
128
ProcessLeader(const CuttlefishConfig & config,const CuttlefishConfig::InstanceSpecific & instance,AutoSetup<ValidateTapDevices>::Type &)129 Result<SharedFD> ProcessLeader(
130 const CuttlefishConfig& config,
131 const CuttlefishConfig::InstanceSpecific& instance,
132 AutoSetup<ValidateTapDevices>::Type& /* dependency */) {
133 if (IsRestoring(config)) {
134 CF_EXPECT(SharedFD::Fifo(instance.restore_adbd_pipe_name(), 0600),
135 "Unable to create adbd restore fifo");
136 }
137 /* These two paths result in pretty different process state, but both
138 * achieve the same goal of making the current process the leader of a
139 * process group, and are therefore grouped together. */
140 if (instance.run_as_daemon()) {
141 return CF_EXPECT(DaemonizeLauncher(config), "DaemonizeLauncher failed");
142 }
143 // Make sure the launcher runs in its own process group even when running
144 // in the foreground
145 if (getsid(0) != getpid()) {
146 CF_EXPECTF(setpgid(0, 0) == 0, "Failed to create new process group: {}",
147 strerror(errno));
148 }
149 return {};
150 }
151
152 // Maintains the state of the boot process, once a final state is reached
153 // (success or failure) it sends the appropriate exit code to the foreground
154 // launcher process
155 class CvdBootStateMachine : public SetupFeature, public KernelLogPipeConsumer {
156 public:
INJECT(CvdBootStateMachine (const CuttlefishConfig & config,AutoSetup<ProcessLeader>::Type & process_leader,KernelLogPipeProvider & kernel_log_pipe_provider,const vm_manager::VmManager & vm_manager,const CuttlefishConfig::InstanceSpecific & instance))157 INJECT(
158 CvdBootStateMachine(const CuttlefishConfig& config,
159 AutoSetup<ProcessLeader>::Type& process_leader,
160 KernelLogPipeProvider& kernel_log_pipe_provider,
161 const vm_manager::VmManager& vm_manager,
162 const CuttlefishConfig::InstanceSpecific& instance))
163 : config_(config),
164 process_leader_(process_leader),
165 kernel_log_pipe_provider_(kernel_log_pipe_provider),
166 vm_manager_(vm_manager),
167 instance_(instance),
168 state_(kBootStarted) {}
169
~CvdBootStateMachine()170 ~CvdBootStateMachine() {
171 if (interrupt_fd_write_->IsOpen()) {
172 char c = 1;
173 CHECK_EQ(interrupt_fd_write_->Write(&c, 1), 1)
174 << interrupt_fd_write_->StrError();
175 }
176 if (boot_event_handler_.joinable()) {
177 boot_event_handler_.join();
178 }
179 if (restore_complete_stop_write_->IsOpen()) {
180 char c = 1;
181 CHECK_EQ(restore_complete_stop_write_->Write(&c, 1), 1)
182 << restore_complete_stop_write_->StrError();
183 }
184 if (restore_complete_handler_.joinable()) {
185 restore_complete_handler_.join();
186 }
187 }
188
189 // SetupFeature
Name() const190 std::string Name() const override { return "CvdBootStateMachine"; }
Enabled() const191 bool Enabled() const override { return true; }
192
193 private:
Dependencies() const194 std::unordered_set<SetupFeature*> Dependencies() const {
195 return {
196 static_cast<SetupFeature*>(&process_leader_),
197 static_cast<SetupFeature*>(&kernel_log_pipe_provider_),
198 };
199 }
ResultSetup()200 Result<void> ResultSetup() override {
201 CF_EXPECT(SharedFD::Pipe(&interrupt_fd_read_, &interrupt_fd_write_));
202 CF_EXPECT(interrupt_fd_read_->IsOpen(), interrupt_fd_read_->StrError());
203 CF_EXPECT(interrupt_fd_write_->IsOpen(), interrupt_fd_write_->StrError());
204 fg_launcher_pipe_ = *process_leader_;
205 if (FLAGS_reboot_notification_fd >= 0) {
206 reboot_notification_ = SharedFD::Dup(FLAGS_reboot_notification_fd);
207 CF_EXPECTF(reboot_notification_->IsOpen(),
208 "Could not dup fd given for reboot_notification_fd: {}",
209 reboot_notification_->StrError());
210 close(FLAGS_reboot_notification_fd);
211 }
212 SharedFD boot_events_pipe = kernel_log_pipe_provider_.KernelLogPipe();
213 CF_EXPECTF(boot_events_pipe->IsOpen(), "Could not get boot events pipe: {}",
214 boot_events_pipe->StrError());
215
216 // Pipe to tell `ThreadLoop` that the restore is complete.
217 SharedFD restore_complete_pipe, restore_complete_pipe_write;
218 // Pipe to tell `restore_complete_handler_` thread to give up.
219 // It isn't perfect, can only break out of the `WaitForRestoreComplete`
220 // step.
221 SharedFD restore_complete_stop_read;
222 if (IsRestoring(config_)) {
223 CF_EXPECT(
224 SharedFD::Pipe(&restore_complete_pipe, &restore_complete_pipe_write),
225 "unable to create pipe");
226 CF_EXPECT(SharedFD::Pipe(&restore_complete_stop_read,
227 &restore_complete_stop_write_),
228 "unable to create pipe");
229
230 restore_complete_handler_ = std::thread(
231 [this, restore_complete_pipe_write, restore_complete_stop_read]() {
232 const auto result =
233 vm_manager_.WaitForRestoreComplete(restore_complete_stop_read);
234 CHECK(result.ok()) << "Failed to wait for restore complete: "
235 << result.error().FormatForEnv();
236 if (!result.value()) {
237 return;
238 }
239
240 cuttlefish::SharedFD restore_adbd_pipe = cuttlefish::SharedFD::Open(
241 config_.ForDefaultInstance().restore_adbd_pipe_name().c_str(),
242 O_WRONLY);
243 CHECK(restore_adbd_pipe->IsOpen())
244 << "Error opening adbd restore pipe: "
245 << restore_adbd_pipe->StrError();
246 CHECK(cuttlefish::WriteAll(restore_adbd_pipe, "2") == 1)
247 << "Error writing to adbd restore pipe: "
248 << restore_adbd_pipe->StrError() << ". This is unrecoverable.";
249
250 auto SubtoolPath = [](const std::string& subtool_name) {
251 auto my_own_dir = android::base::GetExecutableDirectory();
252 std::stringstream subtool_path_stream;
253 subtool_path_stream << my_own_dir << "/" << subtool_name;
254 auto subtool_path = subtool_path_stream.str();
255 if (my_own_dir.empty() || !FileExists(subtool_path)) {
256 return HostBinaryPath(subtool_name);
257 }
258 return subtool_path;
259 };
260 const auto adb_bin_path = SubtoolPath("adb");
261 CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
262 "wait-for-device"},
263 SubprocessOptions(), WEXITED)
264 .ok())
265 << "Failed to suspend bluetooth manager.";
266 CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
267 "shell", "cmd", "bluetooth_manager", "enable"},
268 SubprocessOptions(), WEXITED)
269 .ok());
270 CHECK(Execute({adb_bin_path, "-s", instance_.adb_ip_and_port(),
271 "shell", "cmd", "uwb", "enable-uwb"},
272 SubprocessOptions(), WEXITED)
273 .ok());
274 // Done last so that adb is more likely to be ready.
275 CHECK(cuttlefish::WriteAll(restore_complete_pipe_write, "1") == 1)
276 << "Error writing to restore complete pipe: "
277 << restore_complete_pipe_write->StrError()
278 << ". This is unrecoverable.";
279 });
280 }
281
282 boot_event_handler_ =
283 std::thread([this, boot_events_pipe, restore_complete_pipe]() {
284 ThreadLoop(boot_events_pipe, restore_complete_pipe);
285 });
286
287 return {};
288 }
289
ThreadLoop(SharedFD boot_events_pipe,SharedFD restore_complete_pipe)290 void ThreadLoop(SharedFD boot_events_pipe, SharedFD restore_complete_pipe) {
291 while (true) {
292 std::vector<PollSharedFd> poll_shared_fd = {
293 {
294 .fd = boot_events_pipe,
295 .events = POLLIN | POLLHUP,
296 },
297 {
298 .fd = restore_complete_pipe,
299 .events = restore_complete_pipe->IsOpen()
300 ? (short)(POLLIN | POLLHUP)
301 : (short)0,
302 },
303 {
304 .fd = interrupt_fd_read_,
305 .events = POLLIN | POLLHUP,
306 },
307 };
308 int result = SharedFD::Poll(poll_shared_fd, -1);
309 // interrupt_fd_read_
310 if (poll_shared_fd[2].revents & POLLIN) {
311 return;
312 }
313 if (result < 0) {
314 PLOG(FATAL) << "Failed to call Select";
315 return;
316 }
317 // boot_events_pipe
318 if (poll_shared_fd[0].revents & POLLHUP) {
319 LOG(ERROR) << "Failed to read a complete kernel log boot event.";
320 state_ |= kGuestBootFailed;
321 if (MaybeWriteNotification()) {
322 break;
323 }
324 }
325 if (poll_shared_fd[0].revents & POLLIN) {
326 auto sent_code = OnBootEvtReceived(boot_events_pipe);
327 if (sent_code) {
328 if (!BootCompleted()) {
329 if (!instance_.fail_fast()) {
330 LOG(ERROR) << "Device running, likely in a bad state";
331 break;
332 }
333 auto monitor_res = GetLauncherMonitorFromInstance(instance_, 5);
334 CHECK(monitor_res.ok()) << monitor_res.error().FormatForEnv();
335 auto fail_res = RunLauncherAction(
336 *monitor_res, LauncherAction::kFail, std::optional<int>());
337 CHECK(fail_res.ok()) << fail_res.error().FormatForEnv();
338 }
339 break;
340 }
341 }
342 // restore_complete_pipe
343 if (poll_shared_fd[1].revents & POLLIN) {
344 char buff[1];
345 auto read = restore_complete_pipe->Read(buff, 1);
346 if (read <= 0) {
347 LOG(ERROR) << "Could not read restore pipe: "
348 << restore_complete_pipe->StrError();
349 state_ |= kGuestBootFailed;
350 if (MaybeWriteNotification()) {
351 break;
352 }
353 }
354 state_ |= kGuestBootCompleted;
355 if (MaybeWriteNotification()) {
356 break;
357 }
358 }
359 if (poll_shared_fd[1].revents & POLLHUP) {
360 LOG(ERROR) << "restore_complete_pipe closed unexpectedly";
361 state_ |= kGuestBootFailed;
362 if (MaybeWriteNotification()) {
363 break;
364 }
365 }
366 }
367 }
368
369 // Returns true if the machine is left in a final state
OnBootEvtReceived(SharedFD boot_events_pipe)370 bool OnBootEvtReceived(SharedFD boot_events_pipe) {
371 std::optional<monitor::ReadEventResult> read_result =
372 monitor::ReadEvent(boot_events_pipe);
373 if (!read_result) {
374 LOG(ERROR) << "Failed to read a complete kernel log boot event.";
375 state_ |= kGuestBootFailed;
376 return MaybeWriteNotification();
377 }
378
379 if (read_result->event == monitor::Event::BootCompleted) {
380 LOG(INFO) << "Virtual device booted successfully";
381 state_ |= kGuestBootCompleted;
382 } else if (read_result->event == monitor::Event::BootFailed) {
383 LOG(ERROR) << "Virtual device failed to boot";
384 state_ |= kGuestBootFailed;
385 } // Ignore the other signals
386
387 return MaybeWriteNotification();
388 }
BootCompleted() const389 bool BootCompleted() const { return state_ & kGuestBootCompleted; }
BootFailed() const390 bool BootFailed() const { return state_ & kGuestBootFailed; }
391
SendExitCode(RunnerExitCodes exit_code,SharedFD fd)392 void SendExitCode(RunnerExitCodes exit_code, SharedFD fd) {
393 fd->Write(&exit_code, sizeof(exit_code));
394 // The foreground process will exit after receiving the exit code, if we try
395 // to write again we'll get a SIGPIPE
396 fd->Close();
397 }
MaybeWriteNotification()398 bool MaybeWriteNotification() {
399 std::vector<SharedFD> fds = {reboot_notification_, fg_launcher_pipe_};
400 for (auto& fd : fds) {
401 if (fd->IsOpen()) {
402 if (BootCompleted()) {
403 SendExitCode(RunnerExitCodes::kSuccess, fd);
404 } else if (state_ & kGuestBootFailed) {
405 SendExitCode(RunnerExitCodes::kVirtualDeviceBootFailed, fd);
406 }
407 }
408 }
409 // Either we sent the code before or just sent it, in any case the state is
410 // final
411 return BootCompleted() || (state_ & kGuestBootFailed);
412 }
413
414 const CuttlefishConfig& config_;
415 AutoSetup<ProcessLeader>::Type& process_leader_;
416 KernelLogPipeProvider& kernel_log_pipe_provider_;
417 const vm_manager::VmManager& vm_manager_;
418 const CuttlefishConfig::InstanceSpecific& instance_;
419
420 std::thread boot_event_handler_;
421 std::thread restore_complete_handler_;
422 SharedFD restore_complete_stop_write_;
423 SharedFD fg_launcher_pipe_;
424 SharedFD reboot_notification_;
425 SharedFD interrupt_fd_read_;
426 SharedFD interrupt_fd_write_;
427 int state_;
428 static const int kBootStarted = 0;
429 static const int kGuestBootCompleted = 1 << 0;
430 static const int kGuestBootFailed = 1 << 1;
431 };
432
433 } // namespace
434
435 fruit::Component<fruit::Required<const CuttlefishConfig, KernelLogPipeProvider,
436 const CuttlefishConfig::InstanceSpecific,
437 const vm_manager::VmManager,
438 AutoSetup<ValidateTapDevices>::Type>>
bootStateMachineComponent()439 bootStateMachineComponent() {
440 return fruit::createComponent()
441 .addMultibinding<KernelLogPipeConsumer, CvdBootStateMachine>()
442 .addMultibinding<SetupFeature, CvdBootStateMachine>()
443 .install(AutoSetup<ProcessLeader>::Component);
444 }
445
446 } // namespace cuttlefish
447