1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27
28 #include <android-base/file.h>
29 #include <android-base/logging.h>
30 #include <android-base/parseint.h>
31 #include <android-base/properties.h>
32 #include <android-base/stringprintf.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "utility.h"
51
52 namespace android {
53 namespace snapshot {
54
55 using aidl::android::hardware::boot::MergeStatus;
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using chromeos_update_engine::DeltaArchiveManifest;
76 using chromeos_update_engine::Extent;
77 using chromeos_update_engine::FileDescriptor;
78 using chromeos_update_engine::PartitionUpdate;
79 template <typename T>
80 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
81 using std::chrono::duration_cast;
82 using namespace std::chrono_literals;
83 using namespace std::string_literals;
84 using android::base::Realpath;
85 using android::base::StringPrintf;
86
87 static constexpr char kBootSnapshotsWithoutSlotSwitch[] =
88 "/metadata/ota/snapshot-boot-without-slot-switch";
89 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
90 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
91 static constexpr auto kUpdateStateCheckInterval = 2s;
92 /*
93 * The readahead size is set to 32kb so that
94 * there is no significant memory pressure (/proc/pressure/memory) during boot.
95 * After OTA, during boot, partitions are scanned before marking slot as successful.
96 * This scan will trigger readahead both on source and COW block device thereby
97 * leading to Inactive(file) pages to be very high.
98 *
99 * A lower value may help reduce memory pressure further, however, that will
100 * increase the boot time. Thus, for device which don't care about OTA boot
101 * time, they could use O_DIRECT functionality wherein the I/O to the source
102 * block device will be O_DIRECT.
103 */
104 static constexpr auto kReadAheadSizeKb = 32;
105
106 // Note: IImageManager is an incomplete type in the header, so the default
107 // destructor doesn't work.
~SnapshotManager()108 SnapshotManager::~SnapshotManager() {}
109
New(IDeviceInfo * info)110 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
111 if (!info) {
112 info = new DeviceInfo();
113 }
114
115 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
116 }
117
NewForFirstStageMount(IDeviceInfo * info)118 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
119 if (!info) {
120 DeviceInfo* impl = new DeviceInfo();
121 impl->set_first_stage_init(true);
122 info = impl;
123 }
124 auto sm = New(info);
125
126 // The first-stage version of snapuserd is explicitly started by init. Do
127 // not attempt to using it during tests (which run in normal AOSP).
128 if (!sm->device()->IsTestDevice()) {
129 sm->use_first_stage_snapuserd_ = true;
130 }
131 return sm;
132 }
133
SnapshotManager(IDeviceInfo * device)134 SnapshotManager::SnapshotManager(IDeviceInfo* device)
135 : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {}
136
GetCowName(const std::string & snapshot_name)137 static std::string GetCowName(const std::string& snapshot_name) {
138 return snapshot_name + "-cow";
139 }
140
GetSnapshotDriver(LockedFile * lock)141 SnapshotManager::SnapshotDriver SnapshotManager::GetSnapshotDriver(LockedFile* lock) {
142 if (UpdateUsesUserSnapshots(lock)) {
143 return SnapshotManager::SnapshotDriver::DM_USER;
144 } else {
145 return SnapshotManager::SnapshotDriver::DM_SNAPSHOT;
146 }
147 }
148
GetDmUserCowName(const std::string & snapshot_name,SnapshotManager::SnapshotDriver driver)149 static std::string GetDmUserCowName(const std::string& snapshot_name,
150 SnapshotManager::SnapshotDriver driver) {
151 // dm-user block device will act as a snapshot device. We identify it with
152 // the same partition name so that when partitions can be mounted off
153 // dm-user.
154
155 switch (driver) {
156 case SnapshotManager::SnapshotDriver::DM_USER: {
157 return snapshot_name;
158 }
159
160 case SnapshotManager::SnapshotDriver::DM_SNAPSHOT: {
161 return snapshot_name + "-user-cow";
162 }
163
164 default: {
165 LOG(ERROR) << "Invalid snapshot driver";
166 return "";
167 }
168 }
169 }
170
GetCowImageDeviceName(const std::string & snapshot_name)171 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
172 return snapshot_name + "-cow-img";
173 }
174
GetBaseDeviceName(const std::string & partition_name)175 static std::string GetBaseDeviceName(const std::string& partition_name) {
176 return partition_name + "-base";
177 }
178
GetSourceDeviceName(const std::string & partition_name)179 static std::string GetSourceDeviceName(const std::string& partition_name) {
180 return partition_name + "-src";
181 }
182
BeginUpdate()183 bool SnapshotManager::BeginUpdate() {
184 bool needs_merge = false;
185 if (!TryCancelUpdate(&needs_merge)) {
186 return false;
187 }
188 if (needs_merge) {
189 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
190 auto state = ProcessUpdateState();
191 LOG(INFO) << "Merged with state = " << state;
192 }
193
194 auto file = LockExclusive();
195 if (!file) return false;
196
197 // Purge the ImageManager just in case there is a corrupt lp_metadata file
198 // lying around. (NB: no need to return false on an error, we can let the
199 // update try to progress.)
200 if (EnsureImageManager()) {
201 images_->RemoveAllImages();
202 }
203
204 // Clear any cached metadata (this allows re-using one manager across tests).
205 old_partition_metadata_ = nullptr;
206
207 auto state = ReadUpdateState(file.get());
208 if (state != UpdateState::None) {
209 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
210 return false;
211 }
212 return WriteUpdateState(file.get(), UpdateState::Initiated);
213 }
214
CancelUpdate()215 bool SnapshotManager::CancelUpdate() {
216 bool needs_merge = false;
217 if (!TryCancelUpdate(&needs_merge)) {
218 return false;
219 }
220 if (needs_merge) {
221 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
222 }
223 return !needs_merge;
224 }
225
TryCancelUpdate(bool * needs_merge)226 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
227 *needs_merge = false;
228
229 auto file = LockExclusive();
230 if (!file) return false;
231
232 if (IsSnapshotWithoutSlotSwitch()) {
233 LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
234 "current slot.";
235 return false;
236 }
237
238 UpdateState state = ReadUpdateState(file.get());
239 if (state == UpdateState::None) {
240 RemoveInvalidSnapshots(file.get());
241 return true;
242 }
243
244 if (state == UpdateState::Initiated) {
245 LOG(INFO) << "Update has been initiated, now canceling";
246 return RemoveAllUpdateState(file.get());
247 }
248
249 if (state == UpdateState::Unverified) {
250 // We completed an update, but it can still be canceled if we haven't booted into it.
251 auto slot = GetCurrentSlot();
252 if (slot != Slot::Target) {
253 LOG(INFO) << "Canceling previously completed updates (if any)";
254 return RemoveAllUpdateState(file.get());
255 }
256 }
257 *needs_merge = true;
258 return true;
259 }
260
ReadUpdateSourceSlotSuffix()261 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
262 auto boot_file = GetSnapshotBootIndicatorPath();
263 std::string contents;
264 if (!android::base::ReadFileToString(boot_file, &contents)) {
265 return {};
266 }
267 return contents;
268 }
269
GetCurrentSlot()270 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
271 auto contents = ReadUpdateSourceSlotSuffix();
272 if (contents.empty()) {
273 return Slot::Unknown;
274 }
275 if (device_->GetSlotSuffix() == contents) {
276 return Slot::Source;
277 }
278 return Slot::Target;
279 }
280
GetSnapshotSlotSuffix()281 std::string SnapshotManager::GetSnapshotSlotSuffix() {
282 switch (GetCurrentSlot()) {
283 case Slot::Target:
284 return device_->GetSlotSuffix();
285 default:
286 return device_->GetOtherSlotSuffix();
287 }
288 }
289
RemoveFileIfExists(const std::string & path)290 static bool RemoveFileIfExists(const std::string& path) {
291 std::string message;
292 if (!android::base::RemoveFileIfExists(path, &message)) {
293 LOG(ERROR) << "Remove failed: " << path << ": " << message;
294 return false;
295 }
296 return true;
297 }
298
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)299 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
300 if (prolog && !prolog()) {
301 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
302 return false;
303 }
304
305 LOG(INFO) << "Removing all update state.";
306
307 if (!RemoveAllSnapshots(lock)) {
308 LOG(ERROR) << "Could not remove all snapshots";
309 return false;
310 }
311
312 // It's okay if these fail:
313 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
314 // reading the indicator file, so it's not a problem if it still exists
315 // after the update completes.
316 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
317 // matches the incoming update.
318 std::vector<std::string> files = {
319 GetSnapshotBootIndicatorPath(), GetRollbackIndicatorPath(),
320 GetForwardMergeIndicatorPath(), GetOldPartitionMetadataPath(),
321 GetBootSnapshotsWithoutSlotSwitchPath(),
322 };
323 for (const auto& file : files) {
324 RemoveFileIfExists(file);
325 }
326
327 // If this fails, we'll keep trying to remove the update state (as the
328 // device reboots or starts a new update) until it finally succeeds.
329 return WriteUpdateState(lock, UpdateState::None);
330 }
331
FinishedSnapshotWrites(bool wipe)332 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
333 auto lock = LockExclusive();
334 if (!lock) return false;
335
336 auto update_state = ReadUpdateState(lock.get());
337 if (update_state == UpdateState::Unverified) {
338 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
339 return true;
340 }
341
342 if (update_state != UpdateState::Initiated) {
343 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
344 return false;
345 }
346
347 if (!EnsureNoOverflowSnapshot(lock.get())) {
348 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
349 return false;
350 }
351
352 if (!UpdateForwardMergeIndicator(wipe)) {
353 return false;
354 }
355
356 // This file is written on boot to detect whether a rollback occurred. It
357 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
358 // snapshots too early.
359 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
360 return false;
361 }
362
363 // This file acts as both a quick indicator for init (it can use access(2)
364 // to decide how to do first-stage mounts), and it stores the old slot, so
365 // we can tell whether or not we performed a rollback.
366 auto contents = device_->GetSlotSuffix();
367 auto boot_file = GetSnapshotBootIndicatorPath();
368 if (!WriteStringToFileAtomic(contents, boot_file)) {
369 PLOG(ERROR) << "write failed: " << boot_file;
370 return false;
371 }
372 return WriteUpdateState(lock.get(), UpdateState::Unverified);
373 }
374
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)375 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
376 SnapshotStatus* status) {
377 CHECK(lock);
378 CHECK(lock->lock_mode() == LOCK_EX);
379 CHECK(status);
380
381 if (status->name().empty()) {
382 LOG(ERROR) << "SnapshotStatus has no name.";
383 return false;
384 }
385 // Check these sizes. Like liblp, we guarantee the partition size is
386 // respected, which means it has to be sector-aligned. (This guarantee is
387 // useful for locating avb footers correctly). The COW file size, however,
388 // can be arbitrarily larger than specified, so we can safely round it up.
389 if (status->device_size() % kSectorSize != 0) {
390 LOG(ERROR) << "Snapshot " << status->name()
391 << " device size is not a multiple of the sector size: "
392 << status->device_size();
393 return false;
394 }
395 if (status->snapshot_size() % kSectorSize != 0) {
396 LOG(ERROR) << "Snapshot " << status->name()
397 << " snapshot size is not a multiple of the sector size: "
398 << status->snapshot_size();
399 return false;
400 }
401 if (status->cow_partition_size() % kSectorSize != 0) {
402 LOG(ERROR) << "Snapshot " << status->name()
403 << " cow partition size is not a multiple of the sector size: "
404 << status->cow_partition_size();
405 return false;
406 }
407 if (status->cow_file_size() % kSectorSize != 0) {
408 LOG(ERROR) << "Snapshot " << status->name()
409 << " cow file size is not a multiple of the sector size: "
410 << status->cow_file_size();
411 return false;
412 }
413
414 status->set_state(SnapshotState::CREATED);
415 status->set_sectors_allocated(0);
416 status->set_metadata_sectors(0);
417 status->set_using_snapuserd(cow_creator->using_snapuserd);
418 status->set_compression_algorithm(cow_creator->compression_algorithm);
419 status->set_compression_factor(cow_creator->compression_factor);
420 status->set_read_ahead_size(cow_creator->read_ahead_size);
421 if (cow_creator->enable_threading) {
422 status->set_enable_threading(cow_creator->enable_threading);
423 }
424 if (cow_creator->batched_writes) {
425 status->set_batched_writes(cow_creator->batched_writes);
426 }
427
428 if (!WriteSnapshotStatus(lock, *status)) {
429 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
430 return false;
431 }
432 return true;
433 }
434
CreateCowImage(LockedFile * lock,const std::string & name)435 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
436 CHECK(lock);
437 CHECK(lock->lock_mode() == LOCK_EX);
438 if (!EnsureImageManager()) return Return::Error();
439
440 SnapshotStatus status;
441 if (!ReadSnapshotStatus(lock, name, &status)) {
442 return Return::Error();
443 }
444
445 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
446 if (status.cow_file_size() % kSectorSize != 0) {
447 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
448 << status.cow_file_size();
449 return Return::Error();
450 }
451
452 std::string cow_image_name = GetCowImageDeviceName(name);
453 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
454 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
455 }
456
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::string & base_path_merge,const std::chrono::milliseconds & timeout_ms,std::string * path)457 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
458 const std::string& cow_file, const std::string& base_device,
459 const std::string& base_path_merge,
460 const std::chrono::milliseconds& timeout_ms, std::string* path) {
461 CHECK(lock);
462
463 if (UpdateUsesUserSnapshots(lock)) {
464 SnapshotStatus status;
465 if (!ReadSnapshotStatus(lock, name, &status)) {
466 LOG(ERROR) << "MapDmUserCow: ReadSnapshotStatus failed...";
467 return false;
468 }
469
470 if (status.state() == SnapshotState::NONE ||
471 status.state() == SnapshotState::MERGE_COMPLETED) {
472 LOG(ERROR) << "Should not create a snapshot device for " << name
473 << " after merging has completed.";
474 return false;
475 }
476
477 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
478 if (update_status.state() == UpdateState::MergeCompleted ||
479 update_status.state() == UpdateState::MergeNeedsReboot) {
480 LOG(ERROR) << "Should not create a snapshot device for " << name
481 << " after global merging has completed.";
482 return false;
483 }
484 }
485
486 // Use an extra decoration for first-stage init, so we can transition
487 // to a new table entry in second-stage.
488 std::string misc_name = name;
489 if (use_first_stage_snapuserd_) {
490 misc_name += "-init";
491 }
492
493 if (!EnsureSnapuserdConnected()) {
494 return false;
495 }
496
497 uint64_t base_sectors = 0;
498 if (!UpdateUsesUserSnapshots(lock)) {
499 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
500 if (base_sectors == 0) {
501 LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
502 return false;
503 }
504 } else if (IsSnapshotWithoutSlotSwitch()) {
505 // When snapshots are on current slot, we determine the size
506 // of block device based on the number of COW operations. We cannot
507 // use base device as it will be from older image.
508 unique_fd fd(open(cow_file.c_str(), O_RDONLY | O_CLOEXEC));
509 if (fd < 0) {
510 PLOG(ERROR) << "Failed to open " << cow_file;
511 return false;
512 }
513
514 CowReader reader;
515 if (!reader.Parse(std::move(fd))) {
516 LOG(ERROR) << "Failed to parse cow " << cow_file;
517 return false;
518 }
519
520 uint64_t dev_sz = 0;
521 const auto& header = reader.GetHeader();
522 if (header.prefix.major_version == 2) {
523 const size_t num_ops = reader.get_num_total_data_ops();
524 dev_sz = (num_ops * header.block_size);
525 } else {
526 // create_snapshot will skip in-place copy ops. Hence, fetch this
527 // information directly from v3 header.
528 const auto& v3_header = reader.header_v3();
529 dev_sz = v3_header.op_count_max * v3_header.block_size;
530 }
531
532 base_sectors = dev_sz >> 9;
533 } else {
534 // For userspace snapshots, the size of the base device is taken as the
535 // size of the dm-user block device. Since there is no pseudo mapping
536 // created in the daemon, we no longer need to rely on the daemon for
537 // sizing the dm-user block device.
538 unique_fd fd(TEMP_FAILURE_RETRY(open(base_path_merge.c_str(), O_RDONLY | O_CLOEXEC)));
539 if (fd < 0) {
540 LOG(ERROR) << "Cannot open block device: " << base_path_merge;
541 return false;
542 }
543
544 uint64_t dev_sz = get_block_device_size(fd.get());
545 if (!dev_sz) {
546 LOG(ERROR) << "Failed to find block device size: " << base_path_merge;
547 return false;
548 }
549
550 base_sectors = dev_sz >> 9;
551 }
552
553 DmTable table;
554 table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
555 if (!dm_.CreateDevice(name, table, path, timeout_ms)) {
556 LOG(ERROR) << " dm-user: CreateDevice failed... ";
557 return false;
558 }
559 if (!WaitForDevice(*path, timeout_ms)) {
560 LOG(ERROR) << " dm-user: timeout: Failed to create block device for: " << name;
561 return false;
562 }
563
564 auto control_device = "/dev/dm-user/" + misc_name;
565 if (!WaitForDevice(control_device, timeout_ms)) {
566 return false;
567 }
568
569 if (UpdateUsesUserSnapshots(lock)) {
570 // Now that the dm-user device is created, initialize the daemon and
571 // spin up the worker threads.
572 if (!snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device, base_path_merge)) {
573 LOG(ERROR) << "InitDmUserCow failed";
574 return false;
575 }
576 }
577
578 return snapuserd_client_->AttachDmUser(misc_name);
579 }
580
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)581 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
582 const std::string& base_device, const std::string& cow_device,
583 const std::chrono::milliseconds& timeout_ms,
584 std::string* dev_path) {
585 CHECK(lock);
586
587 SnapshotStatus status;
588 if (!ReadSnapshotStatus(lock, name, &status)) {
589 return false;
590 }
591 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
592 LOG(ERROR) << "Should not create a snapshot device for " << name
593 << " after merging has completed.";
594 return false;
595 }
596
597 // Validate the block device size, as well as the requested snapshot size.
598 // Note that during first-stage init, we don't have the device paths.
599 if (android::base::StartsWith(base_device, "/")) {
600 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
601 if (fd < 0) {
602 PLOG(ERROR) << "open failed: " << base_device;
603 return false;
604 }
605 auto dev_size = get_block_device_size(fd);
606 if (!dev_size) {
607 PLOG(ERROR) << "Could not determine block device size: " << base_device;
608 return false;
609 }
610 if (status.device_size() != dev_size) {
611 LOG(ERROR) << "Block device size for " << base_device << " does not match"
612 << "(expected " << status.device_size() << ", got " << dev_size << ")";
613 return false;
614 }
615 }
616 if (status.device_size() % kSectorSize != 0) {
617 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
618 return false;
619 }
620 if (status.snapshot_size() % kSectorSize != 0 ||
621 status.snapshot_size() > status.device_size()) {
622 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
623 return false;
624 }
625 if (status.device_size() != status.snapshot_size()) {
626 LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
627 << status.device_size() << ", snapshot size = " << status.snapshot_size();
628 return false;
629 }
630
631 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
632
633 // Note that merging is a global state. We do track whether individual devices
634 // have completed merging, but the start of the merge process is considered
635 // atomic.
636 SnapshotStorageMode mode;
637 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
638 switch (update_status.state()) {
639 case UpdateState::MergeCompleted:
640 case UpdateState::MergeNeedsReboot:
641 LOG(ERROR) << "Should not create a snapshot device for " << name
642 << " after global merging has completed.";
643 return false;
644 case UpdateState::Merging:
645 case UpdateState::MergeFailed:
646 // Note: MergeFailed indicates that a merge is in progress, but
647 // is possibly stalled. We still have to honor the merge.
648 if (DecideMergePhase(status) == update_status.merge_phase()) {
649 mode = SnapshotStorageMode::Merge;
650 } else {
651 mode = SnapshotStorageMode::Persistent;
652 }
653 break;
654 default:
655 mode = SnapshotStorageMode::Persistent;
656 break;
657 }
658
659 if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
660 LOG(ERROR) << "Snapshot: " << name
661 << " has snapshot status Merging but mode set to Persistent."
662 << " Changing mode to Snapshot-Merge.";
663 mode = SnapshotStorageMode::Merge;
664 }
665
666 DmTable table;
667 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
668 kSnapshotChunkSize);
669 if (!dm_.CreateDevice(name, table, dev_path, timeout_ms)) {
670 LOG(ERROR) << "Could not create snapshot device: " << name;
671 return false;
672 }
673 return true;
674 }
675
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)676 std::optional<std::string> SnapshotManager::MapCowImage(
677 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
678 if (!EnsureImageManager()) return std::nullopt;
679 auto cow_image_name = GetCowImageDeviceName(name);
680
681 bool ok;
682 std::string cow_dev;
683 if (device_->IsRecovery() || device_->IsFirstStageInit()) {
684 const auto& opener = device_->GetPartitionOpener();
685 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
686 } else {
687 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
688 }
689
690 if (ok) {
691 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
692 return cow_dev;
693 }
694 LOG(ERROR) << "Could not map image device: " << cow_image_name;
695 return std::nullopt;
696 }
697
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)698 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
699 const std::chrono::milliseconds& timeout_ms,
700 std::string* path) {
701 CHECK(lock);
702
703 auto metadata = ReadOldPartitionMetadata(lock);
704 if (!metadata) {
705 LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
706 return false;
707 }
708
709 auto old_name = GetOtherPartitionName(name);
710 auto slot_suffix = device_->GetSlotSuffix();
711 auto slot = SlotNumberForSlotSuffix(slot_suffix);
712
713 CreateLogicalPartitionParams params = {
714 .block_device = device_->GetSuperDevice(slot),
715 .metadata = metadata,
716 .partition_name = old_name,
717 .timeout_ms = timeout_ms,
718 .device_name = GetSourceDeviceName(name),
719 .partition_opener = &device_->GetPartitionOpener(),
720 };
721 if (!CreateLogicalPartition(std::move(params), path)) {
722 LOG(ERROR) << "Could not create source device for snapshot " << name;
723 return false;
724 }
725 return true;
726 }
727
UnmapSnapshot(LockedFile * lock,const std::string & name)728 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
729 CHECK(lock);
730
731 if (UpdateUsesUserSnapshots(lock)) {
732 if (!UnmapUserspaceSnapshotDevice(lock, name)) {
733 return false;
734 }
735 } else {
736 if (!DeleteDeviceIfExists(name)) {
737 LOG(ERROR) << "Could not delete snapshot device: " << name;
738 return false;
739 }
740 }
741 return true;
742 }
743
UnmapCowImage(const std::string & name)744 bool SnapshotManager::UnmapCowImage(const std::string& name) {
745 if (!EnsureImageManager()) return false;
746 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
747 }
748
DeleteSnapshot(LockedFile * lock,const std::string & name)749 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
750 CHECK(lock);
751 CHECK(lock->lock_mode() == LOCK_EX);
752 if (!EnsureImageManager()) return false;
753
754 if (!UnmapCowDevices(lock, name)) {
755 return false;
756 }
757
758 // We can't delete snapshots in recovery. The only way we'd try is it we're
759 // completing or canceling a merge in preparation for a data wipe, in which
760 // case, we don't care if the file sticks around.
761 if (device_->IsRecovery()) {
762 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
763 return true;
764 }
765
766 auto cow_image_name = GetCowImageDeviceName(name);
767 if (images_->BackingImageExists(cow_image_name)) {
768 if (!images_->DeleteBackingImage(cow_image_name)) {
769 return false;
770 }
771 }
772
773 std::string error;
774 auto file_path = GetSnapshotStatusFilePath(name);
775 if (!android::base::RemoveFileIfExists(file_path, &error)) {
776 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
777 return false;
778 }
779
780 // This path may never exist. If it is present, then it's a stale
781 // snapshot status file. Just remove the file and log the message.
782 const std::string tmp_path = file_path + ".tmp";
783 if (!android::base::RemoveFileIfExists(tmp_path, &error)) {
784 LOG(ERROR) << "Failed to remove stale snapshot file " << tmp_path;
785 }
786
787 return true;
788 }
789
InitiateMerge()790 bool SnapshotManager::InitiateMerge() {
791 auto lock = LockExclusive();
792 if (!lock) return false;
793
794 UpdateState state = ReadUpdateState(lock.get());
795 if (state != UpdateState::Unverified) {
796 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
797 return false;
798 }
799
800 auto slot = GetCurrentSlot();
801 if (slot != Slot::Target) {
802 LOG(ERROR) << "Device cannot merge while not booting from new slot";
803 return false;
804 }
805
806 std::vector<std::string> snapshots;
807 if (!ListSnapshots(lock.get(), &snapshots)) {
808 LOG(ERROR) << "Could not list snapshots";
809 return false;
810 }
811
812 auto current_slot_suffix = device_->GetSlotSuffix();
813
814 for (const auto& snapshot : snapshots) {
815 if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
816 // Allow the merge to continue, but log this unexpected case.
817 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
818 continue;
819 }
820
821 // The device has to be mapped, since everything should be merged at
822 // the same time. This is a fairly serious error. We could forcefully
823 // map everything here, but it should have been mapped during first-
824 // stage init.
825 if (dm_.GetState(snapshot) == DmDeviceState::INVALID) {
826 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
827 return false;
828 }
829 }
830
831 auto metadata = ReadCurrentMetadata();
832 for (auto it = snapshots.begin(); it != snapshots.end();) {
833 switch (GetMetadataPartitionState(*metadata, *it)) {
834 case MetadataPartitionState::Flashed:
835 LOG(WARNING) << "Detected re-flashing for partition " << *it
836 << ". Skip merging it.";
837 [[fallthrough]];
838 case MetadataPartitionState::None: {
839 LOG(WARNING) << "Deleting snapshot for partition " << *it;
840 if (!DeleteSnapshot(lock.get(), *it)) {
841 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
842 << ". Skip merging it anyways.";
843 }
844 it = snapshots.erase(it);
845 } break;
846 case MetadataPartitionState::Updated: {
847 ++it;
848 } break;
849 }
850 }
851
852 bool using_snapuserd = false;
853
854 std::vector<std::string> first_merge_group;
855
856 DmTargetSnapshot::Status initial_target_values = {};
857 for (const auto& snapshot : snapshots) {
858 if (!UpdateUsesUserSnapshots(lock.get())) {
859 DmTargetSnapshot::Status current_status;
860 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
861 return false;
862 }
863 initial_target_values.sectors_allocated += current_status.sectors_allocated;
864 initial_target_values.total_sectors += current_status.total_sectors;
865 initial_target_values.metadata_sectors += current_status.metadata_sectors;
866 }
867
868 SnapshotStatus snapshot_status;
869 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
870 return false;
871 }
872
873 using_snapuserd |= snapshot_status.using_snapuserd();
874 if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
875 first_merge_group.emplace_back(snapshot);
876 }
877 }
878
879 SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
880 initial_status.set_state(UpdateState::Merging);
881 initial_status.set_using_snapuserd(using_snapuserd);
882
883 if (!UpdateUsesUserSnapshots(lock.get())) {
884 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
885 initial_status.set_total_sectors(initial_target_values.total_sectors);
886 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
887 }
888
889 // If any partitions shrunk, we need to merge them before we merge any other
890 // partitions (see b/177935716). Otherwise, a merge from another partition
891 // may overwrite the source block of a copy operation.
892 const std::vector<std::string>* merge_group;
893 if (first_merge_group.empty()) {
894 merge_group = &snapshots;
895 initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
896 } else {
897 merge_group = &first_merge_group;
898 initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
899 }
900
901 // Point of no return - mark that we're starting a merge. From now on every
902 // eligible snapshot must be a merge target.
903 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
904 return false;
905 }
906
907 auto reported_code = MergeFailureCode::Ok;
908 for (const auto& snapshot : *merge_group) {
909 // If this fails, we have no choice but to continue. Everything must
910 // be merged. This is not an ideal state to be in, but it is safe,
911 // because we the next boot will try again.
912 auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
913 if (code != MergeFailureCode::Ok) {
914 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
915 if (reported_code == MergeFailureCode::Ok) {
916 reported_code = code;
917 }
918 }
919 }
920
921 // If we couldn't switch everything to a merge target, pre-emptively mark
922 // this merge as failed. It will get acknowledged when WaitForMerge() is
923 // called.
924 if (reported_code != MergeFailureCode::Ok) {
925 WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
926 }
927
928 // Return true no matter what, because a merge was initiated.
929 return true;
930 }
931
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)932 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
933 SnapshotStatus status;
934 if (!ReadSnapshotStatus(lock, name, &status)) {
935 return MergeFailureCode::ReadStatus;
936 }
937 if (status.state() != SnapshotState::CREATED) {
938 LOG(WARNING) << "Snapshot " << name
939 << " has unexpected state: " << SnapshotState_Name(status.state());
940 }
941
942 if (UpdateUsesUserSnapshots(lock)) {
943 if (EnsureSnapuserdConnected()) {
944 // This is the point where we inform the daemon to initiate/resume
945 // the merge
946 if (!snapuserd_client_->InitiateMerge(name)) {
947 return MergeFailureCode::UnknownTable;
948 }
949 } else {
950 LOG(ERROR) << "Failed to connect to snapuserd daemon to initiate merge";
951 return MergeFailureCode::UnknownTable;
952 }
953 } else {
954 // After this, we return true because we technically did switch to a merge
955 // target. Everything else we do here is just informational.
956 if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
957 return code;
958 }
959 }
960
961 status.set_state(SnapshotState::MERGING);
962
963 if (!UpdateUsesUserSnapshots(lock)) {
964 DmTargetSnapshot::Status dm_status;
965 if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
966 LOG(ERROR) << "Could not query merge status for snapshot: " << name;
967 }
968 status.set_sectors_allocated(dm_status.sectors_allocated);
969 status.set_metadata_sectors(dm_status.metadata_sectors);
970 }
971
972 if (!WriteSnapshotStatus(lock, status)) {
973 LOG(ERROR) << "Could not update status file for snapshot: " << name;
974 }
975 return MergeFailureCode::Ok;
976 }
977
RewriteSnapshotDeviceTable(const std::string & name)978 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
979 std::vector<DeviceMapper::TargetInfo> old_targets;
980 if (!dm_.GetTableInfo(name, &old_targets)) {
981 LOG(ERROR) << "Could not read snapshot device table: " << name;
982 return MergeFailureCode::GetTableInfo;
983 }
984 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
985 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
986 return MergeFailureCode::UnknownTable;
987 }
988
989 std::string base_device, cow_device;
990 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
991 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
992 return MergeFailureCode::GetTableParams;
993 }
994
995 DmTable table;
996 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
997 SnapshotStorageMode::Merge, kSnapshotChunkSize);
998 if (!dm_.LoadTableAndActivate(name, table)) {
999 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
1000 return MergeFailureCode::ActivateNewTable;
1001 }
1002 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
1003 return MergeFailureCode::Ok;
1004 }
1005
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)1006 bool SnapshotManager::GetSingleTarget(const std::string& dm_name, TableQuery query,
1007 DeviceMapper::TargetInfo* target) {
1008 if (dm_.GetState(dm_name) == DmDeviceState::INVALID) {
1009 return false;
1010 }
1011
1012 std::vector<DeviceMapper::TargetInfo> targets;
1013 bool result;
1014 if (query == TableQuery::Status) {
1015 result = dm_.GetTableStatus(dm_name, &targets);
1016 } else {
1017 result = dm_.GetTableInfo(dm_name, &targets);
1018 }
1019 if (!result) {
1020 LOG(ERROR) << "Could not query device: " << dm_name;
1021 return false;
1022 }
1023 if (targets.size() != 1) {
1024 return false;
1025 }
1026
1027 *target = std::move(targets[0]);
1028 return true;
1029 }
1030
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)1031 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
1032 DeviceMapper::TargetInfo snap_target;
1033 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
1034 return false;
1035 }
1036 auto type = DeviceMapper::GetTargetType(snap_target.spec);
1037
1038 // If this is not a user-snapshot device then it should either
1039 // be a dm-snapshot or dm-snapshot-merge target
1040 if (type != "user") {
1041 if (type != "snapshot" && type != "snapshot-merge") {
1042 return false;
1043 }
1044 }
1045
1046 if (target) {
1047 *target = std::move(snap_target);
1048 }
1049 return true;
1050 }
1051
UpdateStateToStr(const enum UpdateState state)1052 auto SnapshotManager::UpdateStateToStr(const enum UpdateState state) {
1053 switch (state) {
1054 case None:
1055 return "None";
1056 case Initiated:
1057 return "Initiated";
1058 case Unverified:
1059 return "Unverified";
1060 case Merging:
1061 return "Merging";
1062 case MergeNeedsReboot:
1063 return "MergeNeedsReboot";
1064 case MergeCompleted:
1065 return "MergeCompleted";
1066 case MergeFailed:
1067 return "MergeFailed";
1068 case Cancelled:
1069 return "Cancelled";
1070 default:
1071 return "Unknown";
1072 }
1073 }
1074
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)1075 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
1076 DmTargetSnapshot::Status* status) {
1077 DeviceMapper::TargetInfo target;
1078 if (!IsSnapshotDevice(dm_name, &target)) {
1079 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
1080 return false;
1081 }
1082 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
1083 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
1084 return false;
1085 }
1086 if (target_type) {
1087 *target_type = DeviceMapper::GetTargetType(target.spec);
1088 }
1089 if (!status->error.empty()) {
1090 LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
1091 return false;
1092 }
1093 return true;
1094 }
1095
1096 // Note that when a merge fails, we will *always* try again to complete the
1097 // merge each time the device boots. There is no harm in doing so, and if
1098 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)1099 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
1100 const std::function<bool()>& before_cancel) {
1101 while (true) {
1102 auto result = CheckMergeState(before_cancel);
1103 LOG(INFO) << "ProcessUpdateState handling state: " << UpdateStateToStr(result.state);
1104
1105 if (result.state == UpdateState::MergeFailed) {
1106 AcknowledgeMergeFailure(result.failure_code);
1107 }
1108 if (result.state != UpdateState::Merging) {
1109 // Either there is no merge, or the merge was finished, so no need
1110 // to keep waiting.
1111 return result.state;
1112 }
1113
1114 if (callback && !callback()) {
1115 return result.state;
1116 }
1117
1118 // This wait is not super time sensitive, so we have a relatively
1119 // low polling frequency.
1120 std::this_thread::sleep_for(kUpdateStateCheckInterval);
1121 }
1122 }
1123
CheckMergeState(const std::function<bool ()> & before_cancel)1124 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
1125 auto lock = LockExclusive();
1126 if (!lock) {
1127 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
1128 }
1129
1130 auto result = CheckMergeState(lock.get(), before_cancel);
1131 LOG(INFO) << "CheckMergeState for snapshots returned: " << UpdateStateToStr(result.state);
1132
1133 if (result.state == UpdateState::MergeCompleted) {
1134 // Do this inside the same lock. Failures get acknowledged without the
1135 // lock, because flock() might have failed.
1136 AcknowledgeMergeSuccess(lock.get());
1137 } else if (result.state == UpdateState::Cancelled) {
1138 if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
1139 LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
1140 }
1141 }
1142 return result;
1143 }
1144
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)1145 auto SnapshotManager::CheckMergeState(LockedFile* lock,
1146 const std::function<bool()>& before_cancel) -> MergeResult {
1147 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1148 switch (update_status.state()) {
1149 case UpdateState::None:
1150 case UpdateState::MergeCompleted:
1151 // Harmless races are allowed between two callers of WaitForMerge,
1152 // so in both of these cases we just propagate the state.
1153 return MergeResult(update_status.state());
1154
1155 case UpdateState::Merging:
1156 case UpdateState::MergeNeedsReboot:
1157 case UpdateState::MergeFailed:
1158 // We'll poll each snapshot below. Note that for the NeedsReboot
1159 // case, we always poll once to give cleanup another opportunity to
1160 // run.
1161 break;
1162
1163 case UpdateState::Unverified:
1164 // This is an edge case. Normally cancelled updates are detected
1165 // via the merge poll below, but if we never started a merge, we
1166 // need to also check here.
1167 if (HandleCancelledUpdate(lock, before_cancel)) {
1168 return MergeResult(UpdateState::Cancelled);
1169 }
1170 return MergeResult(update_status.state());
1171
1172 default:
1173 return MergeResult(update_status.state());
1174 }
1175
1176 std::vector<std::string> snapshots;
1177 if (!ListSnapshots(lock, &snapshots)) {
1178 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
1179 }
1180
1181 auto current_slot_suffix = device_->GetSlotSuffix();
1182
1183 bool cancelled = false;
1184 bool merging = false;
1185 bool needs_reboot = false;
1186 bool wrong_phase = false;
1187 MergeFailureCode failure_code = MergeFailureCode::Ok;
1188 for (const auto& snapshot : snapshots) {
1189 if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
1190 // This will have triggered an error message in InitiateMerge already.
1191 LOG(ERROR) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1192 continue;
1193 }
1194
1195 auto result = CheckTargetMergeState(lock, snapshot, update_status);
1196 LOG(INFO) << "CheckTargetMergeState for " << snapshot
1197 << " returned: " << UpdateStateToStr(result.state);
1198
1199 switch (result.state) {
1200 case UpdateState::MergeFailed:
1201 // Take the first failure code in case other failures compound.
1202 if (failure_code == MergeFailureCode::Ok) {
1203 failure_code = result.failure_code;
1204 }
1205 break;
1206 case UpdateState::Merging:
1207 merging = true;
1208 break;
1209 case UpdateState::MergeNeedsReboot:
1210 needs_reboot = true;
1211 break;
1212 case UpdateState::MergeCompleted:
1213 break;
1214 case UpdateState::Cancelled:
1215 cancelled = true;
1216 break;
1217 case UpdateState::None:
1218 wrong_phase = true;
1219 break;
1220 default:
1221 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1222 << "\"" << result.state << "\"";
1223 if (failure_code == MergeFailureCode::Ok) {
1224 failure_code = MergeFailureCode::UnexpectedMergeState;
1225 }
1226 break;
1227 }
1228 }
1229
1230 if (merging) {
1231 // Note that we handle "Merging" before we handle anything else. We
1232 // want to poll until *nothing* is merging if we can, so everything has
1233 // a chance to get marked as completed or failed.
1234 return MergeResult(UpdateState::Merging);
1235 }
1236 if (failure_code != MergeFailureCode::Ok) {
1237 // Note: since there are many drop-out cases for failure, we acknowledge
1238 // it in WaitForMerge rather than here and elsewhere.
1239 return MergeResult(UpdateState::MergeFailed, failure_code);
1240 }
1241 if (wrong_phase) {
1242 // If we got here, no other partitions are being merged, and nothing
1243 // failed to merge. It's safe to move to the next merge phase.
1244 auto code = MergeSecondPhaseSnapshots(lock);
1245 if (code != MergeFailureCode::Ok) {
1246 return MergeResult(UpdateState::MergeFailed, code);
1247 }
1248 return MergeResult(UpdateState::Merging);
1249 }
1250 if (needs_reboot) {
1251 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1252 return MergeResult(UpdateState::MergeNeedsReboot);
1253 }
1254 if (cancelled) {
1255 // This is an edge case, that we handle as correctly as we sensibly can.
1256 // The underlying partition has changed behind update_engine, and we've
1257 // removed the snapshot as a result. The exact state of the update is
1258 // undefined now, but this can only happen on an unlocked device where
1259 // partitions can be flashed without wiping userdata.
1260 return MergeResult(UpdateState::Cancelled);
1261 }
1262 return MergeResult(UpdateState::MergeCompleted);
1263 }
1264
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1265 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1266 const SnapshotUpdateStatus& update_status)
1267 -> MergeResult {
1268 SnapshotStatus snapshot_status;
1269 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1270 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1271 }
1272
1273 std::unique_ptr<LpMetadata> current_metadata;
1274
1275 if (!IsSnapshotDevice(name)) {
1276 if (!current_metadata) {
1277 current_metadata = ReadCurrentMetadata();
1278 }
1279
1280 if (!current_metadata ||
1281 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1282 DeleteSnapshot(lock, name);
1283 return MergeResult(UpdateState::Cancelled);
1284 }
1285
1286 // During a check, we decided the merge was complete, but we were unable to
1287 // collapse the device-mapper stack and perform COW cleanup. If we haven't
1288 // rebooted after this check, the device will still be a snapshot-merge
1289 // target. If we have rebooted, the device will now be a linear target,
1290 // and we can try cleanup again.
1291 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1292 // NB: It's okay if this fails now, we gave cleanup our best effort.
1293 OnSnapshotMergeComplete(lock, name, snapshot_status);
1294 return MergeResult(UpdateState::MergeCompleted);
1295 }
1296
1297 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1298 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1299 }
1300
1301 // This check is expensive so it is only enabled for debugging.
1302 DCHECK((current_metadata = ReadCurrentMetadata()) &&
1303 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1304
1305 if (UpdateUsesUserSnapshots(lock)) {
1306 if (!EnsureSnapuserdConnected()) {
1307 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1308 }
1309
1310 // Query the snapshot status from the daemon
1311 const auto merge_status = snapuserd_client_->QuerySnapshotStatus(name);
1312 if (merge_status == "snapshot-merge-failed") {
1313 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1314 }
1315
1316 // This is the case when device reboots during merge. Once the device boots,
1317 // snapuserd daemon will not resume merge immediately in first stage init.
1318 // This is slightly different as compared to dm-snapshot-merge; In this
1319 // case, metadata file will have "MERGING" state whereas the daemon will be
1320 // waiting to resume the merge. Thus, we resume the merge at this point.
1321 if (merge_status == "snapshot" && snapshot_status.state() == SnapshotState::MERGING) {
1322 if (!snapuserd_client_->InitiateMerge(name)) {
1323 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1324 }
1325 return MergeResult(UpdateState::Merging);
1326 }
1327
1328 if (merge_status == "snapshot" &&
1329 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1330 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1331 // The snapshot is not being merged because it's in the wrong phase.
1332 return MergeResult(UpdateState::None);
1333 }
1334
1335 if (merge_status == "snapshot-merge") {
1336 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1337 LOG(ERROR) << "Snapshot " << name
1338 << " is merging after being marked merge-complete.";
1339 return MergeResult(UpdateState::MergeFailed,
1340 MergeFailureCode::UnmergedSectorsAfterCompletion);
1341 }
1342 return MergeResult(UpdateState::Merging);
1343 }
1344
1345 if (merge_status != "snapshot-merge-complete") {
1346 LOG(ERROR) << "Snapshot " << name << " has incorrect status: " << merge_status;
1347 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1348 }
1349 } else {
1350 // dm-snapshot in the kernel
1351 std::string target_type;
1352 DmTargetSnapshot::Status status;
1353 if (!QuerySnapshotStatus(name, &target_type, &status)) {
1354 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1355 }
1356 if (target_type == "snapshot" &&
1357 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1358 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1359 // The snapshot is not being merged because it's in the wrong phase.
1360 return MergeResult(UpdateState::None);
1361 }
1362 if (target_type != "snapshot-merge") {
1363 // We can get here if we failed to rewrite the target type in
1364 // InitiateMerge(). If we failed to create the target in first-stage
1365 // init, boot would not succeed.
1366 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1367 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1368 }
1369
1370 // These two values are equal when merging is complete.
1371 if (status.sectors_allocated != status.metadata_sectors) {
1372 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1373 LOG(ERROR) << "Snapshot " << name
1374 << " is merging after being marked merge-complete.";
1375 return MergeResult(UpdateState::MergeFailed,
1376 MergeFailureCode::UnmergedSectorsAfterCompletion);
1377 }
1378 return MergeResult(UpdateState::Merging);
1379 }
1380 }
1381
1382 // Merging is done. First, update the status file to indicate the merge
1383 // is complete. We do this before calling OnSnapshotMergeComplete, even
1384 // though this means the write is potentially wasted work (since in the
1385 // ideal case we'll immediately delete the file).
1386 //
1387 // This makes it simpler to reason about the next reboot: no matter what
1388 // part of cleanup failed, first-stage init won't try to create another
1389 // snapshot device for this partition.
1390 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1391 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1392 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1393 }
1394 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1395 return MergeResult(UpdateState::MergeNeedsReboot);
1396 }
1397 return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1398 }
1399
1400 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1401 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1402 const SnapshotStatus& status) {
1403 // If no partition was created (the COW exists entirely on /data), the
1404 // device-mapper layering is different than if we had a partition.
1405 if (status.cow_partition_size() == 0) {
1406 return GetCowImageDeviceName(snapshot);
1407 }
1408 return GetCowName(snapshot);
1409 }
1410
MergeSecondPhaseSnapshots(LockedFile * lock)1411 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1412 std::vector<std::string> snapshots;
1413 if (!ListSnapshots(lock, &snapshots)) {
1414 return MergeFailureCode::ListSnapshots;
1415 }
1416
1417 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1418 CHECK(update_status.state() == UpdateState::Merging ||
1419 update_status.state() == UpdateState::MergeFailed);
1420 CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1421
1422 update_status.set_state(UpdateState::Merging);
1423 update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1424 if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1425 return MergeFailureCode::WriteStatus;
1426 }
1427
1428 MergeFailureCode result = MergeFailureCode::Ok;
1429 for (const auto& snapshot : snapshots) {
1430 SnapshotStatus snapshot_status;
1431 if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1432 return MergeFailureCode::ReadStatus;
1433 }
1434 if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1435 continue;
1436 }
1437 auto code = SwitchSnapshotToMerge(lock, snapshot);
1438 if (code != MergeFailureCode::Ok) {
1439 LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1440 if (result == MergeFailureCode::Ok) {
1441 result = code;
1442 }
1443 }
1444 }
1445 return result;
1446 }
1447
GetBootSnapshotsWithoutSlotSwitchPath()1448 std::string SnapshotManager::GetBootSnapshotsWithoutSlotSwitchPath() {
1449 return metadata_dir_ + "/" + android::base::Basename(kBootSnapshotsWithoutSlotSwitch);
1450 }
1451
GetSnapshotBootIndicatorPath()1452 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1453 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1454 }
1455
GetRollbackIndicatorPath()1456 std::string SnapshotManager::GetRollbackIndicatorPath() {
1457 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1458 }
1459
GetForwardMergeIndicatorPath()1460 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1461 return metadata_dir_ + "/allow-forward-merge";
1462 }
1463
GetOldPartitionMetadataPath()1464 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1465 return metadata_dir_ + "/old-partition-metadata";
1466 }
1467
AcknowledgeMergeSuccess(LockedFile * lock)1468 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1469 // It's not possible to remove update state in recovery, so write an
1470 // indicator that cleanup is needed on reboot. If a factory data reset
1471 // was requested, it doesn't matter, everything will get wiped anyway.
1472 // To make testing easier we consider a /data wipe as cleaned up.
1473 if (device_->IsRecovery()) {
1474 WriteUpdateState(lock, UpdateState::MergeCompleted);
1475 return;
1476 }
1477
1478 RemoveAllUpdateState(lock);
1479
1480 if (UpdateUsesUserSnapshots(lock) && !device()->IsTestDevice()) {
1481 if (snapuserd_client_) {
1482 snapuserd_client_->DetachSnapuserd();
1483 snapuserd_client_->RemoveTransitionedDaemonIndicator();
1484 snapuserd_client_ = nullptr;
1485 }
1486 }
1487 }
1488
AcknowledgeMergeFailure(MergeFailureCode failure_code)1489 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1490 // Log first, so worst case, we always have a record of why the calls below
1491 // were being made.
1492 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1493
1494 auto lock = LockExclusive();
1495 if (!lock) return;
1496
1497 // Since we released the lock in between WaitForMerge and here, it's
1498 // possible (1) the merge successfully completed or (2) was already
1499 // marked as a failure. So make sure to check the state again, and
1500 // only mark as a failure if appropriate.
1501 UpdateState state = ReadUpdateState(lock.get());
1502 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1503 return;
1504 }
1505
1506 WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1507 }
1508
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1509 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1510 const SnapshotStatus& status) {
1511 if (!UpdateUsesUserSnapshots(lock)) {
1512 if (IsSnapshotDevice(name)) {
1513 // We are extra-cautious here, to avoid deleting the wrong table.
1514 std::string target_type;
1515 DmTargetSnapshot::Status dm_status;
1516 if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1517 return false;
1518 }
1519 if (target_type != "snapshot-merge") {
1520 LOG(ERROR) << "Unexpected target type " << target_type
1521 << " for snapshot device: " << name;
1522 return false;
1523 }
1524 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1525 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1526 return false;
1527 }
1528 if (!CollapseSnapshotDevice(lock, name, status)) {
1529 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1530 return false;
1531 }
1532 }
1533 } else {
1534 // Just collapse the device - no need to query again as we just did
1535 // prior to calling this function
1536 if (!CollapseSnapshotDevice(lock, name, status)) {
1537 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1538 return false;
1539 }
1540 }
1541
1542 // Note that collapsing is implicitly an Unmap, so we don't need to
1543 // unmap the snapshot.
1544
1545 if (!DeleteSnapshot(lock, name)) {
1546 LOG(ERROR) << "Could not delete snapshot: " << name;
1547 return false;
1548 }
1549 return true;
1550 }
1551
CollapseSnapshotDevice(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1552 bool SnapshotManager::CollapseSnapshotDevice(LockedFile* lock, const std::string& name,
1553 const SnapshotStatus& status) {
1554 if (!UpdateUsesUserSnapshots(lock)) {
1555 // Verify we have a snapshot-merge device.
1556 DeviceMapper::TargetInfo target;
1557 if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1558 return false;
1559 }
1560 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1561 // This should be impossible, it was checked earlier.
1562 LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1563 return false;
1564 }
1565
1566 std::string base_device, cow_device;
1567 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1568 LOG(ERROR) << "Could not parse snapshot device " << name
1569 << " parameters: " << target.data;
1570 return false;
1571 }
1572 }
1573
1574 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1575 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1576 LOG(ERROR) << "Snapshot " << name
1577 << " size is not sector aligned: " << status.snapshot_size();
1578 return false;
1579 }
1580
1581 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1582 // Create a DmTable that is identical to the base device.
1583 CreateLogicalPartitionParams base_device_params{
1584 .block_device = device_->GetSuperDevice(slot),
1585 .metadata_slot = slot,
1586 .partition_name = name,
1587 .partition_opener = &device_->GetPartitionOpener(),
1588 };
1589 DmTable table;
1590 if (!CreateDmTable(base_device_params, &table)) {
1591 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1592 return false;
1593 }
1594
1595 if (!dm_.LoadTableAndActivate(name, table)) {
1596 return false;
1597 }
1598
1599 if (!UpdateUsesUserSnapshots(lock)) {
1600 // Attempt to delete the snapshot device if one still exists. Nothing
1601 // should be depending on the device, and device-mapper should have
1602 // flushed remaining I/O. We could in theory replace with dm-zero (or
1603 // re-use the table above), but for now it's better to know why this
1604 // would fail.
1605 //
1606 // Furthermore, we should not be trying to unmap for userspace snapshot
1607 // as unmap will fail since dm-user itself was a snapshot device prior
1608 // to switching of tables. Unmap will fail as the device will be mounted
1609 // by system partitions
1610 if (status.using_snapuserd()) {
1611 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
1612 UnmapDmUserDevice(dm_user_name);
1613 }
1614 }
1615
1616 // We can't delete base device immediately as daemon holds a reference.
1617 // Make sure we wait for all the worker threads to terminate and release
1618 // the reference
1619 if (UpdateUsesUserSnapshots(lock) && EnsureSnapuserdConnected()) {
1620 if (!snapuserd_client_->WaitForDeviceDelete(name)) {
1621 LOG(ERROR) << "Failed to wait for " << name << " control device to delete";
1622 }
1623 }
1624
1625 auto base_name = GetBaseDeviceName(name);
1626 if (!DeleteDeviceIfExists(base_name)) {
1627 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1628 }
1629
1630 if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1631 LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1632 }
1633
1634 return true;
1635 }
1636
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1637 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1638 const std::function<bool()>& before_cancel) {
1639 auto slot = GetCurrentSlot();
1640 if (slot == Slot::Unknown) {
1641 return false;
1642 }
1643
1644 // If all snapshots were reflashed, then cancel the entire update.
1645 if (AreAllSnapshotsCancelled(lock)) {
1646 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1647 return RemoveAllUpdateState(lock, before_cancel);
1648 }
1649
1650 // If update has been rolled back, then cancel the entire update.
1651 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1652 // when ProcessUpdateState() returns UpdateState::Cancelled.
1653 auto current_slot = GetCurrentSlot();
1654 if (current_slot != Slot::Source) {
1655 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1656 << " slot, taking no action.";
1657 return false;
1658 }
1659
1660 // current_slot == Source. Attempt to detect rollbacks.
1661 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1662 // This unverified update is not attempted. Take no action.
1663 PLOG(INFO) << "Rollback indicator not detected. "
1664 << "Update state is being processed before reboot, taking no action.";
1665 return false;
1666 }
1667
1668 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1669 return RemoveAllUpdateState(lock, before_cancel);
1670 }
1671
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1672 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1673 std::vector<std::string>* snapuserd_argv) {
1674 LOG(INFO) << "Performing transition for snapuserd.";
1675
1676 // Don't use EnsureSnapuserdConnected() because this is called from init,
1677 // and attempting to do so will deadlock.
1678 if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1679 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1680 if (!snapuserd_client_) {
1681 LOG(ERROR) << "Unable to connect to snapuserd";
1682 return false;
1683 }
1684 }
1685
1686 auto lock = LockExclusive();
1687 if (!lock) return false;
1688
1689 std::vector<std::string> snapshots;
1690 if (!ListSnapshots(lock.get(), &snapshots)) {
1691 LOG(ERROR) << "Failed to list snapshots.";
1692 return false;
1693 }
1694
1695 if (UpdateUsesUserSnapshots(lock.get()) && transition == InitTransition::SELINUX_DETACH) {
1696 snapuserd_argv->emplace_back("-user_snapshot");
1697 if (UpdateUsesIouring(lock.get())) {
1698 snapuserd_argv->emplace_back("-io_uring");
1699 }
1700 if (UpdateUsesODirect(lock.get())) {
1701 snapuserd_argv->emplace_back("-o_direct");
1702 }
1703 }
1704
1705 size_t num_cows = 0;
1706 size_t ok_cows = 0;
1707 for (const auto& snapshot : snapshots) {
1708 std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
1709
1710 if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
1711 continue;
1712 }
1713
1714 DeviceMapper::TargetInfo target;
1715 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1716 continue;
1717 }
1718
1719 auto target_type = DeviceMapper::GetTargetType(target.spec);
1720 if (target_type != "user") {
1721 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1722 continue;
1723 }
1724
1725 num_cows++;
1726
1727 SnapshotStatus snapshot_status;
1728 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1729 LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1730 continue;
1731 }
1732
1733 auto misc_name = user_cow_name;
1734
1735 std::string source_device_name;
1736 if (snapshot_status.old_partition_size() > 0) {
1737 source_device_name = GetSourceDeviceName(snapshot);
1738 } else {
1739 source_device_name = GetBaseDeviceName(snapshot);
1740 }
1741
1742 std::string source_device;
1743 if (!dm_.GetDmDevicePathByName(source_device_name, &source_device)) {
1744 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1745 continue;
1746 }
1747
1748 std::string base_path_merge;
1749 if (!dm_.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &base_path_merge)) {
1750 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1751 continue;
1752 }
1753
1754 std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1755
1756 std::string cow_image_device;
1757 if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1758 LOG(ERROR) << "Could not get device path for " << cow_image_name;
1759 continue;
1760 }
1761
1762 if (transition == InitTransition::SELINUX_DETACH) {
1763 if (!UpdateUsesUserSnapshots(lock.get())) {
1764 auto message = misc_name + "," + cow_image_device + "," + source_device;
1765 snapuserd_argv->emplace_back(std::move(message));
1766 } else {
1767 auto message = misc_name + "," + cow_image_device + "," + source_device + "," +
1768 base_path_merge;
1769 snapuserd_argv->emplace_back(std::move(message));
1770 }
1771 SetReadAheadSize(cow_image_device, snapshot_status.read_ahead_size());
1772 SetReadAheadSize(source_device, snapshot_status.read_ahead_size());
1773
1774 // Do not attempt to connect to the new snapuserd yet, it hasn't
1775 // been started. We do however want to wait for the misc device
1776 // to have been created.
1777 ok_cows++;
1778 continue;
1779 }
1780
1781 DmTable table;
1782 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1783 if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
1784 LOG(ERROR) << "Unable to swap tables for " << misc_name;
1785 continue;
1786 }
1787
1788 // Wait for ueventd to acknowledge and create the control device node.
1789 std::string control_device = "/dev/dm-user/" + misc_name;
1790 if (!WaitForDevice(control_device, 10s)) {
1791 LOG(ERROR) << "dm-user control device no found: " << misc_name;
1792 continue;
1793 }
1794
1795 uint64_t base_sectors;
1796 if (!UpdateUsesUserSnapshots(lock.get())) {
1797 base_sectors =
1798 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1799 } else {
1800 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_image_device,
1801 source_device, base_path_merge);
1802 }
1803
1804 if (base_sectors == 0) {
1805 // Unrecoverable as metadata reads from cow device failed
1806 LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1807 return false;
1808 }
1809
1810 CHECK(base_sectors <= target.spec.length);
1811
1812 if (!snapuserd_client_->AttachDmUser(misc_name)) {
1813 // This error is unrecoverable. We cannot proceed because reads to
1814 // the underlying device will fail.
1815 LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1816 return false;
1817 }
1818
1819 ok_cows++;
1820 }
1821
1822 if (ok_cows != num_cows) {
1823 LOG(ERROR) << "Could not transition all snapuserd consumers.";
1824 return false;
1825 }
1826 return true;
1827 }
1828
ReadCurrentMetadata()1829 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1830 const auto& opener = device_->GetPartitionOpener();
1831 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1832 auto super_device = device_->GetSuperDevice(slot);
1833 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1834 if (!metadata) {
1835 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1836 return nullptr;
1837 }
1838 return metadata;
1839 }
1840
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1841 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1842 const LpMetadata& metadata, const std::string& name) {
1843 auto partition = android::fs_mgr::FindPartition(metadata, name);
1844 if (!partition) return MetadataPartitionState::None;
1845 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1846 return MetadataPartitionState::Updated;
1847 }
1848 return MetadataPartitionState::Flashed;
1849 }
1850
AreAllSnapshotsCancelled(LockedFile * lock)1851 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1852 std::vector<std::string> snapshots;
1853 if (!ListSnapshots(lock, &snapshots)) {
1854 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1855 << "after applying an update. Assuming no snapshots.";
1856 // Let HandleCancelledUpdate resets UpdateState.
1857 return true;
1858 }
1859
1860 std::map<std::string, bool> flashing_status;
1861
1862 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1863 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1864 << "removing update states.";
1865 return false;
1866 }
1867
1868 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1869 [](const auto& pair) { return pair.second; });
1870
1871 if (all_snapshots_cancelled) {
1872 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1873 }
1874 return all_snapshots_cancelled;
1875 }
1876
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1877 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1878 const std::vector<std::string>& snapshots,
1879 std::map<std::string, bool>* out) {
1880 CHECK(lock);
1881
1882 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1883 if (source_slot_suffix.empty()) {
1884 return false;
1885 }
1886 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1887 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1888
1889 // Attempt to detect re-flashing on each partition.
1890 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1891 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1892 // deleted. Caller is responsible for merging the rest of the snapshots.
1893 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1894 //
1895 // Note that we use target slot metadata, since if an OTA has been applied
1896 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1897 // operation against dynamic partitions ensures that all copies of the
1898 // metadata are in sync, so flashing all partitions on the source slot will
1899 // remove the UPDATED flag on the target slot as well.
1900 const auto& opener = device_->GetPartitionOpener();
1901 auto super_device = device_->GetSuperDevice(target_slot);
1902 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1903 if (!metadata) {
1904 return false;
1905 }
1906
1907 for (const auto& snapshot_name : snapshots) {
1908 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1909 MetadataPartitionState::Updated) {
1910 out->emplace(snapshot_name, false);
1911 } else {
1912 // Delete snapshots for partitions that are re-flashed after the update.
1913 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1914 out->emplace(snapshot_name, true);
1915 }
1916 }
1917 return true;
1918 }
1919
RemoveInvalidSnapshots(LockedFile * lock)1920 void SnapshotManager::RemoveInvalidSnapshots(LockedFile* lock) {
1921 std::vector<std::string> snapshots;
1922
1923 // Remove the stale snapshot metadata
1924 //
1925 // We make sure that all the three cases
1926 // are valid before removing the snapshot metadata:
1927 //
1928 // 1: dm state is active
1929 // 2: Root fs is not mounted off as a snapshot device
1930 // 3: Snapshot slot suffix should match current device slot
1931 if (!ListSnapshots(lock, &snapshots, device_->GetSlotSuffix()) || snapshots.empty()) {
1932 return;
1933 }
1934
1935 // We indeed have some invalid snapshots
1936 for (const auto& name : snapshots) {
1937 if (dm_.GetState(name) == DmDeviceState::ACTIVE && !IsSnapshotDevice(name)) {
1938 if (!DeleteSnapshot(lock, name)) {
1939 LOG(ERROR) << "Failed to delete invalid snapshot: " << name;
1940 } else {
1941 LOG(INFO) << "Invalid snapshot: " << name << " deleted";
1942 }
1943 }
1944 }
1945 }
1946
RemoveAllSnapshots(LockedFile * lock)1947 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1948 std::vector<std::string> snapshots;
1949 if (!ListSnapshots(lock, &snapshots)) {
1950 LOG(ERROR) << "Could not list snapshots";
1951 return false;
1952 }
1953
1954 std::map<std::string, bool> flashing_status;
1955 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1956 LOG(WARNING) << "Failed to get flashing status";
1957 }
1958
1959 auto current_slot = GetCurrentSlot();
1960 bool ok = true;
1961 bool has_mapped_cow_images = false;
1962 for (const auto& name : snapshots) {
1963 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1964 // If boot indicator is missing, update state is None or Initiated, so
1965 // it is also okay to unmap and delete all the snapshots.
1966 // If booting off target slot,
1967 // - should not unmap because:
1968 // - In Android mode, snapshots are not mapped, but
1969 // filesystems are mounting off dm-linear targets directly.
1970 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1971 // - If partition is flashed or unknown, it is okay to delete snapshots.
1972 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1973 // as dm-snapshot (for example, after merge completes).
1974 bool should_unmap = current_slot != Slot::Target;
1975 bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1976 if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1977 // Something very unexpected has happened - we want to unmap this
1978 // snapshot, but it's on the wrong slot. We can't unmap an active
1979 // partition. If this is not really a snapshot, skip the unmap
1980 // step.
1981 if (dm_.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1982 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1983 << " for source partition; removing without unmap.";
1984 should_unmap = false;
1985 }
1986 }
1987
1988 bool partition_ok = true;
1989 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1990 partition_ok = false;
1991 }
1992 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1993 partition_ok = false;
1994 }
1995
1996 if (!partition_ok) {
1997 // Remember whether or not we were able to unmap the cow image.
1998 auto cow_image_device = GetCowImageDeviceName(name);
1999 has_mapped_cow_images |=
2000 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
2001
2002 ok = false;
2003 }
2004 }
2005
2006 if (ok || !has_mapped_cow_images) {
2007 // Delete any image artifacts as a precaution, in case an update is
2008 // being cancelled due to some corrupted state in an lp_metadata file.
2009 // Note that we do not do this if some cow images are still mapped,
2010 // since we must not remove backing storage if it's in use.
2011 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
2012 LOG(ERROR) << "Could not remove all snapshot artifacts";
2013 return false;
2014 }
2015 }
2016 return ok;
2017 }
2018
2019 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)2020 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
2021 Slot current_slot, const std::string& name) {
2022 if (current_slot != Slot::Target) {
2023 return true;
2024 }
2025 auto it = flashing_status.find(name);
2026 if (it == flashing_status.end()) {
2027 LOG(WARNING) << "Can't determine flashing status for " << name;
2028 return true;
2029 }
2030 if (it->second) {
2031 // partition flashed, okay to delete obsolete snapshots
2032 return true;
2033 }
2034 return !IsSnapshotDevice(name);
2035 }
2036
GetUpdateState(double * progress)2037 UpdateState SnapshotManager::GetUpdateState(double* progress) {
2038 // If we've never started an update, the state file won't exist.
2039 auto state_file = GetStateFilePath();
2040 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2041 return UpdateState::None;
2042 }
2043
2044 auto lock = LockShared();
2045 if (!lock) {
2046 return UpdateState::None;
2047 }
2048
2049 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
2050 auto state = update_status.state();
2051 if (progress == nullptr) {
2052 return state;
2053 }
2054
2055 if (state == UpdateState::MergeCompleted) {
2056 *progress = 100.0;
2057 return state;
2058 }
2059
2060 *progress = 0.0;
2061 if (state != UpdateState::Merging) {
2062 return state;
2063 }
2064
2065 if (!UpdateUsesUserSnapshots(lock.get())) {
2066 // Sum all the snapshot states as if the system consists of a single huge
2067 // snapshots device, then compute the merge completion percentage of that
2068 // device.
2069 std::vector<std::string> snapshots;
2070 if (!ListSnapshots(lock.get(), &snapshots)) {
2071 LOG(ERROR) << "Could not list snapshots";
2072 return state;
2073 }
2074
2075 DmTargetSnapshot::Status fake_snapshots_status = {};
2076 for (const auto& snapshot : snapshots) {
2077 DmTargetSnapshot::Status current_status;
2078
2079 if (!IsSnapshotDevice(snapshot)) continue;
2080 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
2081
2082 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
2083 fake_snapshots_status.total_sectors += current_status.total_sectors;
2084 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
2085 }
2086
2087 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
2088 update_status.sectors_allocated());
2089 } else {
2090 if (EnsureSnapuserdConnected()) {
2091 *progress = snapuserd_client_->GetMergePercent();
2092 }
2093 }
2094
2095 return state;
2096 }
2097
IsSnapshotWithoutSlotSwitch()2098 bool SnapshotManager::IsSnapshotWithoutSlotSwitch() {
2099 return (access(GetBootSnapshotsWithoutSlotSwitchPath().c_str(), F_OK) == 0);
2100 }
2101
UpdateUsesCompression()2102 bool SnapshotManager::UpdateUsesCompression() {
2103 auto lock = LockShared();
2104 if (!lock) return false;
2105 return UpdateUsesCompression(lock.get());
2106 }
2107
UpdateUsesCompression(LockedFile * lock)2108 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
2109 // This returns true even if compression is "none", since update_engine is
2110 // really just trying to see if snapuserd is in use.
2111 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2112 return update_status.using_snapuserd();
2113 }
2114
UpdateUsesIouring(LockedFile * lock)2115 bool SnapshotManager::UpdateUsesIouring(LockedFile* lock) {
2116 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2117 return update_status.io_uring_enabled();
2118 }
2119
UpdateUsesODirect(LockedFile * lock)2120 bool SnapshotManager::UpdateUsesODirect(LockedFile* lock) {
2121 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2122 return update_status.o_direct();
2123 }
2124
2125 /*
2126 * Please see b/304829384 for more details.
2127 *
2128 * In Android S, we use dm-snapshot for mounting snapshots and snapshot-merge
2129 * process. If the vendor partition continues to be on Android S, then
2130 * "snapuserd" binary in first stage ramdisk will be from vendor partition.
2131 * Thus, we need to maintain backward compatibility.
2132 *
2133 * Now, We take a two step approach to maintain the backward compatibility:
2134 *
2135 * 1: During OTA installation, we will continue to use "user-space" snapshots
2136 * for OTA installation as both update-engine and snapuserd binary will be from system partition.
2137 * However, during installation, we mark "legacy_snapuserd" in
2138 * SnapshotUpdateStatus file to mark that this is a path to support backward compatibility.
2139 * Thus, this function will return "false" during OTA installation.
2140 *
2141 * 2: Post OTA reboot, there are two key steps:
2142 * a: During first stage init, "init" and "snapuserd" could be from vendor
2143 * partition. This could be from Android S. Thus, the snapshot mount path
2144 * will be based off dm-snapshot.
2145 *
2146 * b: Post selinux transition, "init" and "update-engine" will be "system"
2147 * partition. Now, since the snapshots are mounted off dm-snapshot,
2148 * update-engine interaction with "snapuserd" should work based off
2149 * dm-snapshots.
2150 *
2151 * TL;DR: update-engine will use the "system" snapuserd for installing new
2152 * updates (this is safe as there is no "vendor" snapuserd running during
2153 * installation). Post reboot, update-engine will use the legacy path when
2154 * communicating with "vendor" snapuserd that was started in first-stage
2155 * init. Hence, this function checks:
2156 * i: Are we in post OTA reboot
2157 * ii: Is the Vendor from Android 12
2158 * iii: If both (i) and (ii) are true, then use the dm-snapshot based
2159 * approach.
2160 *
2161 */
IsLegacySnapuserdPostReboot()2162 bool SnapshotManager::IsLegacySnapuserdPostReboot() {
2163 if (is_legacy_snapuserd_.has_value() && is_legacy_snapuserd_.value() == true) {
2164 auto slot = GetCurrentSlot();
2165 if (slot == Slot::Target) {
2166 return true;
2167 }
2168 }
2169 return false;
2170 }
2171
UpdateUsesUserSnapshots()2172 bool SnapshotManager::UpdateUsesUserSnapshots() {
2173 // This and the following function is constantly
2174 // invoked during snapshot merge. We want to avoid
2175 // constantly reading from disk. Hence, store this
2176 // value in memory.
2177 //
2178 // Furthermore, this value in the disk is set
2179 // only when OTA is applied and doesn't change
2180 // during merge phase. Hence, once we know that
2181 // the value is read from disk the very first time,
2182 // it is safe to read successive checks from memory.
2183
2184 if (is_snapshot_userspace_.has_value()) {
2185 // Check if legacy snapuserd is running post OTA reboot
2186 if (IsLegacySnapuserdPostReboot()) {
2187 return false;
2188 }
2189 return is_snapshot_userspace_.value();
2190 }
2191
2192 auto lock = LockShared();
2193 if (!lock) return false;
2194
2195 return UpdateUsesUserSnapshots(lock.get());
2196 }
2197
UpdateUsesUserSnapshots(LockedFile * lock)2198 bool SnapshotManager::UpdateUsesUserSnapshots(LockedFile* lock) {
2199 if (!is_snapshot_userspace_.has_value()) {
2200 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2201 is_snapshot_userspace_ = update_status.userspace_snapshots();
2202 is_legacy_snapuserd_ = update_status.legacy_snapuserd();
2203 }
2204
2205 if (IsLegacySnapuserdPostReboot()) {
2206 return false;
2207 }
2208
2209 return is_snapshot_userspace_.value();
2210 }
2211
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)2212 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
2213 const std::string& suffix) {
2214 CHECK(lock);
2215
2216 auto dir_path = metadata_dir_ + "/snapshots"s;
2217 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
2218 if (!dir) {
2219 PLOG(ERROR) << "opendir failed: " << dir_path;
2220 return false;
2221 }
2222
2223 struct dirent* dp;
2224 while ((dp = readdir(dir.get())) != nullptr) {
2225 if (dp->d_type != DT_REG) continue;
2226
2227 std::string name(dp->d_name);
2228 if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
2229 continue;
2230 }
2231
2232 // Insert system and product partition at the beginning so that
2233 // during snapshot-merge, these partitions are merged first.
2234 if (name == "system_a" || name == "system_b" || name == "product_a" ||
2235 name == "product_b") {
2236 snapshots->insert(snapshots->begin(), std::move(name));
2237 } else {
2238 snapshots->emplace_back(std::move(name));
2239 }
2240 }
2241
2242 return true;
2243 }
2244
IsSnapshotManagerNeeded()2245 bool SnapshotManager::IsSnapshotManagerNeeded() {
2246 return access(kBootIndicatorPath, F_OK) == 0;
2247 }
2248
GetGlobalRollbackIndicatorPath()2249 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
2250 return kRollbackIndicatorPath;
2251 }
2252
NeedSnapshotsInFirstStageMount()2253 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
2254 if (IsSnapshotWithoutSlotSwitch()) {
2255 if (GetCurrentSlot() != Slot::Source) {
2256 LOG(ERROR) << "Snapshots marked to boot without slot switch; but slot is wrong";
2257 return false;
2258 }
2259 return true;
2260 }
2261 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
2262 // will create devices that look like the old slot, except with extra
2263 // content at the end of each device. This will confuse dm-verity, and
2264 // ultimately we'll fail to boot. Why not make it a fatal error and have
2265 // the reason be clearer? Because the indicator file still exists, and
2266 // if this was FATAL, reverting to the old slot would be broken.
2267 auto slot = GetCurrentSlot();
2268
2269 if (slot != Slot::Target) {
2270 if (slot == Slot::Source) {
2271 // Device is rebooting into the original slot, so mark this as a
2272 // rollback.
2273 auto path = GetRollbackIndicatorPath();
2274 if (!android::base::WriteStringToFile("1", path)) {
2275 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
2276 } else {
2277 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
2278 }
2279 }
2280 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
2281 return false;
2282 }
2283
2284 // If we can't read the update state, it's unlikely anything else will
2285 // succeed, so this is a fatal error. We'll eventually exhaust boot
2286 // attempts and revert to the old slot.
2287 auto lock = LockShared();
2288 if (!lock) {
2289 LOG(FATAL) << "Could not read update state to determine snapshot status";
2290 return false;
2291 }
2292 switch (ReadUpdateState(lock.get())) {
2293 case UpdateState::Unverified:
2294 case UpdateState::Merging:
2295 case UpdateState::MergeFailed:
2296 return true;
2297 default:
2298 return false;
2299 }
2300 }
2301
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)2302 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
2303 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
2304 LOG(INFO) << "Creating logical partitions with snapshots as needed";
2305
2306 auto lock = LockExclusive();
2307 if (!lock) return false;
2308
2309 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2310 return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
2311 }
2312
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)2313 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
2314 uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
2315 const auto& opener = device_->GetPartitionOpener();
2316 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2317 if (!metadata) {
2318 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2319 return false;
2320 }
2321
2322 if (!EnsureImageManager()) {
2323 return false;
2324 }
2325
2326 for (const auto& partition : metadata->partitions) {
2327 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
2328 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
2329 << kCowGroupName;
2330 continue;
2331 }
2332
2333 CreateLogicalPartitionParams params = {
2334 .block_device = super_device,
2335 .metadata = metadata.get(),
2336 .partition = &partition,
2337 .timeout_ms = timeout_ms,
2338 .partition_opener = &opener,
2339 };
2340 if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
2341 return false;
2342 }
2343 }
2344
2345 LOG(INFO) << "Created logical partitions with snapshot.";
2346 return true;
2347 }
2348
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)2349 static std::chrono::milliseconds GetRemainingTime(
2350 const std::chrono::milliseconds& timeout,
2351 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
2352 // If no timeout is specified, execute all commands without specifying any timeout.
2353 if (timeout.count() == 0) return std::chrono::milliseconds(0);
2354 auto passed_time = std::chrono::steady_clock::now() - begin;
2355 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
2356 if (remaining_time.count() <= 0) {
2357 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
2358 << remaining_time.count() << "ms remaining)";
2359 // Return min() instead of remaining_time here because 0 is treated as a special value for
2360 // no timeout, where the rest of the commands will still be executed.
2361 return std::chrono::milliseconds::min();
2362 }
2363 return remaining_time;
2364 }
2365
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)2366 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
2367 CreateLogicalPartitionParams params,
2368 SnapshotContext context, SnapshotPaths* paths) {
2369 auto begin = std::chrono::steady_clock::now();
2370
2371 CHECK(lock);
2372
2373 if (params.GetPartitionName() != params.GetDeviceName()) {
2374 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
2375 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
2376 return false;
2377 }
2378
2379 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2380 // reading super partition metadata).
2381 CreateLogicalPartitionParams::OwnedData params_owned_data;
2382 if (!params.InitDefaults(¶ms_owned_data)) {
2383 return false;
2384 }
2385
2386 if (!params.partition->num_extents) {
2387 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2388 return true; // leave path empty to indicate that nothing is mapped.
2389 }
2390
2391 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2392 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
2393 // completed, live_snapshot_status is set to nullopt.
2394 std::optional<SnapshotStatus> live_snapshot_status;
2395 do {
2396 if (!IsSnapshotWithoutSlotSwitch() &&
2397 !(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2398 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2399 << params.GetPartitionName();
2400 break;
2401 }
2402 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2403 if (access(file_path.c_str(), F_OK) != 0) {
2404 if (errno != ENOENT) {
2405 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2406 << ": Can't access " << file_path;
2407 return false;
2408 }
2409 break;
2410 }
2411 live_snapshot_status = std::make_optional<SnapshotStatus>();
2412 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2413 return false;
2414 }
2415 // No live snapshot if merge is completed.
2416 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2417 live_snapshot_status.reset();
2418 }
2419
2420 if (live_snapshot_status->state() == SnapshotState::NONE ||
2421 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2422 0) {
2423 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2424 << " is invalid, ignoring: state = "
2425 << SnapshotState_Name(live_snapshot_status->state())
2426 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2427 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2428 live_snapshot_status.reset();
2429 }
2430 } while (0);
2431
2432 if (live_snapshot_status.has_value()) {
2433 // dm-snapshot requires the base device to be writable.
2434 params.force_writable = true;
2435 // Map the base device with a different name to avoid collision.
2436 params.device_name = GetBaseDeviceName(params.GetPartitionName());
2437 }
2438
2439 AutoDeviceList created_devices;
2440
2441 // Create the base device for the snapshot, or if there is no snapshot, the
2442 // device itself. This device consists of the real blocks in the super
2443 // partition that this logical partition occupies.
2444 std::string base_path;
2445 if (!CreateLogicalPartition(params, &base_path)) {
2446 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2447 << " as device " << params.GetDeviceName();
2448 return false;
2449 }
2450 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, params.GetDeviceName());
2451
2452 if (paths) {
2453 paths->target_device = base_path;
2454 }
2455
2456 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2457 if (remaining_time.count() < 0) {
2458 return false;
2459 }
2460
2461 // Wait for the base device to appear
2462 if (!WaitForDevice(base_path, remaining_time)) {
2463 return false;
2464 }
2465
2466 if (!live_snapshot_status.has_value()) {
2467 created_devices.Release();
2468 return true;
2469 }
2470
2471 // We don't have ueventd in first-stage init, so use device major:minor
2472 // strings instead.
2473 std::string base_device;
2474 if (!dm_.GetDeviceString(params.GetDeviceName(), &base_device)) {
2475 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2476 return false;
2477 }
2478
2479 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2480 if (remaining_time.count() < 0) return false;
2481
2482 std::string cow_name;
2483 CreateLogicalPartitionParams cow_params = params;
2484 cow_params.timeout_ms = remaining_time;
2485 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2486 return false;
2487 }
2488 std::string cow_device;
2489 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2490 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2491 return false;
2492 }
2493 if (paths) {
2494 paths->cow_device_name = cow_name;
2495 }
2496
2497 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2498 if (remaining_time.count() < 0) return false;
2499
2500 if (context == SnapshotContext::Update && live_snapshot_status->using_snapuserd()) {
2501 // Stop here, we can't run dm-user yet, the COW isn't built.
2502 created_devices.Release();
2503 return true;
2504 }
2505
2506 if (live_snapshot_status->using_snapuserd()) {
2507 // Get the source device (eg the view of the partition from before it was resized).
2508 std::string source_device_path;
2509 if (live_snapshot_status->old_partition_size() > 0) {
2510 if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2511 &source_device_path)) {
2512 LOG(ERROR) << "Could not map source device for: " << cow_name;
2513 return false;
2514 }
2515
2516 auto source_device = GetSourceDeviceName(params.GetPartitionName());
2517 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, source_device);
2518 } else {
2519 source_device_path = base_path;
2520 }
2521
2522 if (!WaitForDevice(source_device_path, remaining_time)) {
2523 return false;
2524 }
2525
2526 std::string cow_path;
2527 if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2528 LOG(ERROR) << "Could not determine path for: " << cow_name;
2529 return false;
2530 }
2531 if (!WaitForDevice(cow_path, remaining_time)) {
2532 return false;
2533 }
2534
2535 auto name = GetDmUserCowName(params.GetPartitionName(), GetSnapshotDriver(lock));
2536
2537 std::string new_cow_device;
2538 if (!MapDmUserCow(lock, name, cow_path, source_device_path, base_path, remaining_time,
2539 &new_cow_device)) {
2540 LOG(ERROR) << "Could not map dm-user device for partition "
2541 << params.GetPartitionName();
2542 return false;
2543 }
2544 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, name);
2545
2546 cow_device = new_cow_device;
2547 }
2548
2549 // For userspace snapshots, dm-user block device itself will act as a
2550 // snapshot device. There is one subtle difference - MapSnapshot will create
2551 // either snapshot target or snapshot-merge target based on the underlying
2552 // state of the snapshot device. If snapshot-merge target is created, merge
2553 // will immediately start in the kernel.
2554 //
2555 // This is no longer true with respect to userspace snapshots. When dm-user
2556 // block device is created, we just have the snapshots ready but daemon in
2557 // the user-space will not start the merge. We have to explicitly inform the
2558 // daemon to resume the merge. Check ProcessUpdateState() call stack.
2559 if (!UpdateUsesUserSnapshots(lock)) {
2560 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2561 if (remaining_time.count() < 0) return false;
2562
2563 std::string path;
2564 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2565 &path)) {
2566 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2567 return false;
2568 }
2569 // No need to add params.GetPartitionName() to created_devices since it is immediately
2570 // released.
2571
2572 if (paths) {
2573 paths->snapshot_device = path;
2574 }
2575 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2576 } else {
2577 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at "
2578 << cow_device;
2579 }
2580
2581 created_devices.Release();
2582
2583 return true;
2584 }
2585
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2586 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2587 const std::string& target_partition_name) {
2588 CHECK(lock);
2589
2590 if (!UnmapSnapshot(lock, target_partition_name)) {
2591 return false;
2592 }
2593
2594 if (!UnmapCowDevices(lock, target_partition_name)) {
2595 return false;
2596 }
2597
2598 auto base_name = GetBaseDeviceName(target_partition_name);
2599 if (!DeleteDeviceIfExists(base_name)) {
2600 LOG(ERROR) << "Cannot delete base device: " << base_name;
2601 return false;
2602 }
2603
2604 auto source_name = GetSourceDeviceName(target_partition_name);
2605 if (!DeleteDeviceIfExists(source_name)) {
2606 LOG(ERROR) << "Cannot delete source device: " << source_name;
2607 return false;
2608 }
2609
2610 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2611
2612 return true;
2613 }
2614
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2615 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2616 const SnapshotStatus& snapshot_status,
2617 AutoDeviceList* created_devices, std::string* cow_name) {
2618 CHECK(lock);
2619 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2620 auto begin = std::chrono::steady_clock::now();
2621
2622 std::string partition_name = params.GetPartitionName();
2623 std::string cow_image_name = GetCowImageDeviceName(partition_name);
2624 *cow_name = GetCowName(partition_name);
2625
2626 // Map COW image if necessary.
2627 if (snapshot_status.cow_file_size() > 0) {
2628 if (!EnsureImageManager()) return false;
2629 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2630 if (remaining_time.count() < 0) return false;
2631
2632 if (!MapCowImage(partition_name, remaining_time).has_value()) {
2633 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2634 return false;
2635 }
2636 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2637
2638 // If no COW partition exists, just return the image alone.
2639 if (snapshot_status.cow_partition_size() == 0) {
2640 *cow_name = std::move(cow_image_name);
2641 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2642 return true;
2643 }
2644 }
2645
2646 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2647 if (remaining_time.count() < 0) return false;
2648
2649 CHECK(snapshot_status.cow_partition_size() > 0);
2650
2651 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2652 // COW image device as the last extent.
2653 CreateLogicalPartitionParams cow_partition_params = params;
2654 cow_partition_params.partition = nullptr;
2655 cow_partition_params.partition_name = *cow_name;
2656 cow_partition_params.device_name.clear();
2657 DmTable table;
2658 if (!CreateDmTable(cow_partition_params, &table)) {
2659 return false;
2660 }
2661 // If the COW image exists, append it as the last extent.
2662 if (snapshot_status.cow_file_size() > 0) {
2663 std::string cow_image_device;
2664 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2665 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2666 return false;
2667 }
2668 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2669 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2670 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2671 0);
2672 }
2673
2674 // We have created the DmTable now. Map it.
2675 std::string cow_path;
2676 if (!dm_.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2677 LOG(ERROR) << "Could not create COW device: " << *cow_name;
2678 return false;
2679 }
2680 created_devices->EmplaceBack<AutoUnmapDevice>(&dm_, *cow_name);
2681 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2682 return true;
2683 }
2684
UnmapCowDevices(LockedFile * lock,const std::string & name)2685 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2686 CHECK(lock);
2687 if (!EnsureImageManager()) return false;
2688
2689 if (UpdateUsesCompression(lock) && !UpdateUsesUserSnapshots(lock)) {
2690 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
2691 if (!UnmapDmUserDevice(dm_user_name)) {
2692 return false;
2693 }
2694 }
2695
2696 if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2697 LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2698 return false;
2699 }
2700
2701 std::string cow_image_name = GetCowImageDeviceName(name);
2702 if (!images_->UnmapImageIfExists(cow_image_name)) {
2703 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2704 return false;
2705 }
2706 return true;
2707 }
2708
UnmapDmUserDevice(const std::string & dm_user_name)2709 bool SnapshotManager::UnmapDmUserDevice(const std::string& dm_user_name) {
2710 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2711 return true;
2712 }
2713
2714 if (!DeleteDeviceIfExists(dm_user_name)) {
2715 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2716 return false;
2717 }
2718
2719 if (EnsureSnapuserdConnected()) {
2720 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2721 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2722 return false;
2723 }
2724 }
2725
2726 // Ensure the control device is gone so we don't run into ABA problems.
2727 auto control_device = "/dev/dm-user/" + dm_user_name;
2728 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2729 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2730 return false;
2731 }
2732 return true;
2733 }
2734
UnmapUserspaceSnapshotDevice(LockedFile * lock,const std::string & snapshot_name)2735 bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
2736 const std::string& snapshot_name) {
2737 auto dm_user_name = GetDmUserCowName(snapshot_name, GetSnapshotDriver(lock));
2738 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2739 return true;
2740 }
2741
2742 CHECK(lock);
2743
2744 SnapshotStatus snapshot_status;
2745
2746 if (!ReadSnapshotStatus(lock, snapshot_name, &snapshot_status)) {
2747 return false;
2748 }
2749 // If the merge is complete, then we switch dm tables which is equivalent
2750 // to unmap; hence, we can't be deleting the device
2751 // as the table would be mounted off partitions and will fail.
2752 if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
2753 if (!DeleteDeviceIfExists(dm_user_name, 4000ms)) {
2754 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2755 return false;
2756 }
2757 }
2758
2759 if (EnsureSnapuserdConnected()) {
2760 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2761 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2762 return false;
2763 }
2764 }
2765
2766 // Ensure the control device is gone so we don't run into ABA problems.
2767 auto control_device = "/dev/dm-user/" + dm_user_name;
2768 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2769 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2770 return false;
2771 }
2772 return true;
2773 }
2774
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2775 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2776 auto lock = LockExclusive();
2777 if (!lock) return false;
2778
2779 auto state = ReadUpdateState(lock.get());
2780 if (state == UpdateState::Unverified) {
2781 if (GetCurrentSlot() == Slot::Target) {
2782 LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2783 return false;
2784 }
2785 } else if (state != UpdateState::Initiated) {
2786 LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2787 return false;
2788 }
2789
2790 std::vector<std::string> snapshots;
2791 if (!ListSnapshots(lock.get(), &snapshots)) {
2792 return false;
2793 }
2794
2795 const auto& opener = device_->GetPartitionOpener();
2796 auto slot_suffix = device_->GetOtherSlotSuffix();
2797 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2798 auto super_device = device_->GetSuperDevice(slot_number);
2799 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2800 if (!metadata) {
2801 LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2802 << super_device;
2803 return false;
2804 }
2805
2806 for (const auto& snapshot : snapshots) {
2807 if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2808 LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2809 return false;
2810 }
2811
2812 CreateLogicalPartitionParams params = {
2813 .block_device = super_device,
2814 .metadata = metadata.get(),
2815 .partition_name = snapshot,
2816 .timeout_ms = timeout_ms,
2817 .partition_opener = &opener,
2818 };
2819 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2820 nullptr)) {
2821 LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2822 return false;
2823 }
2824 }
2825
2826 LOG(INFO) << "MapAllSnapshots succeeded.";
2827 return true;
2828 }
2829
UnmapAllSnapshots()2830 bool SnapshotManager::UnmapAllSnapshots() {
2831 auto lock = LockExclusive();
2832 if (!lock) return false;
2833
2834 return UnmapAllSnapshots(lock.get());
2835 }
2836
UnmapAllSnapshots(LockedFile * lock)2837 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2838 std::vector<std::string> snapshots;
2839 if (!ListSnapshots(lock, &snapshots)) {
2840 return false;
2841 }
2842
2843 for (const auto& snapshot : snapshots) {
2844 if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2845 LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2846 return false;
2847 }
2848 }
2849
2850 // Terminate the daemon and release the snapuserd_client_ object.
2851 // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2852 // will re-create the object and establish the socket connection.
2853 if (snapuserd_client_) {
2854 LOG(INFO) << "Shutdown snapuserd daemon";
2855 snapuserd_client_->DetachSnapuserd();
2856 snapuserd_client_ = nullptr;
2857 }
2858
2859 return true;
2860 }
2861
OpenFile(const std::string & file,int lock_flags)2862 auto SnapshotManager::OpenFile(const std::string& file,
2863 int lock_flags) -> std::unique_ptr<LockedFile> {
2864 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2865 if (fd < 0) {
2866 PLOG(ERROR) << "Open failed: " << file;
2867 return nullptr;
2868 }
2869 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2870 PLOG(ERROR) << "Acquire flock failed: " << file;
2871 return nullptr;
2872 }
2873 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2874 // calls, so strip extra flags.
2875 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2876 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2877 }
2878
~LockedFile()2879 SnapshotManager::LockedFile::~LockedFile() {
2880 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2881 PLOG(ERROR) << "Failed to unlock file: " << path_;
2882 }
2883 }
2884
GetStateFilePath() const2885 std::string SnapshotManager::GetStateFilePath() const {
2886 return metadata_dir_ + "/state"s;
2887 }
2888
GetMergeStateFilePath() const2889 std::string SnapshotManager::GetMergeStateFilePath() const {
2890 return metadata_dir_ + "/merge_state"s;
2891 }
2892
GetLockPath() const2893 std::string SnapshotManager::GetLockPath() const {
2894 return metadata_dir_;
2895 }
2896
OpenLock(int lock_flags)2897 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2898 auto lock_file = GetLockPath();
2899 return OpenFile(lock_file, lock_flags);
2900 }
2901
LockShared()2902 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2903 return OpenLock(LOCK_SH);
2904 }
2905
LockExclusive()2906 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2907 return OpenLock(LOCK_EX);
2908 }
2909
UpdateStateFromString(const std::string & contents)2910 static UpdateState UpdateStateFromString(const std::string& contents) {
2911 if (contents.empty() || contents == "none") {
2912 return UpdateState::None;
2913 } else if (contents == "initiated") {
2914 return UpdateState::Initiated;
2915 } else if (contents == "unverified") {
2916 return UpdateState::Unverified;
2917 } else if (contents == "merging") {
2918 return UpdateState::Merging;
2919 } else if (contents == "merge-completed") {
2920 return UpdateState::MergeCompleted;
2921 } else if (contents == "merge-needs-reboot") {
2922 return UpdateState::MergeNeedsReboot;
2923 } else if (contents == "merge-failed") {
2924 return UpdateState::MergeFailed;
2925 } else if (contents == "cancelled") {
2926 return UpdateState::Cancelled;
2927 } else {
2928 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2929 return UpdateState::None;
2930 }
2931 }
2932
operator <<(std::ostream & os,UpdateState state)2933 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2934 switch (state) {
2935 case UpdateState::None:
2936 return os << "none";
2937 case UpdateState::Initiated:
2938 return os << "initiated";
2939 case UpdateState::Unverified:
2940 return os << "unverified";
2941 case UpdateState::Merging:
2942 return os << "merging";
2943 case UpdateState::MergeCompleted:
2944 return os << "merge-completed";
2945 case UpdateState::MergeNeedsReboot:
2946 return os << "merge-needs-reboot";
2947 case UpdateState::MergeFailed:
2948 return os << "merge-failed";
2949 case UpdateState::Cancelled:
2950 return os << "cancelled";
2951 default:
2952 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2953 return os;
2954 }
2955 }
2956
operator <<(std::ostream & os,MergePhase phase)2957 std::ostream& operator<<(std::ostream& os, MergePhase phase) {
2958 switch (phase) {
2959 case MergePhase::NO_MERGE:
2960 return os << "none";
2961 case MergePhase::FIRST_PHASE:
2962 return os << "first";
2963 case MergePhase::SECOND_PHASE:
2964 return os << "second";
2965 default:
2966 LOG(ERROR) << "Unknown merge phase: " << static_cast<uint32_t>(phase);
2967 return os << "unknown(" << static_cast<uint32_t>(phase) << ")";
2968 }
2969 }
2970
ReadUpdateState(LockedFile * lock)2971 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2972 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2973 return status.state();
2974 }
2975
ReadSnapshotUpdateStatus(LockedFile * lock)2976 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2977 CHECK(lock);
2978
2979 SnapshotUpdateStatus status = {};
2980 std::string contents;
2981 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2982 PLOG(ERROR) << "Read state file failed";
2983 status.set_state(UpdateState::None);
2984 return status;
2985 }
2986
2987 if (!status.ParseFromString(contents)) {
2988 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2989
2990 // Try to rollback to legacy file to support devices that are
2991 // currently using the old file format.
2992 // TODO(b/147409432)
2993 status.set_state(UpdateStateFromString(contents));
2994 }
2995
2996 return status;
2997 }
2998
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2999 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
3000 MergeFailureCode failure_code) {
3001 SnapshotUpdateStatus status;
3002 status.set_state(state);
3003
3004 switch (state) {
3005 case UpdateState::MergeFailed:
3006 status.set_merge_failure_code(failure_code);
3007 break;
3008 case UpdateState::Initiated:
3009 status.set_source_build_fingerprint(
3010 android::base::GetProperty("ro.build.fingerprint", ""));
3011 break;
3012 default:
3013 break;
3014 }
3015
3016 // If we're transitioning between two valid states (eg, we're not beginning
3017 // or ending an OTA), then make sure to propagate the compression bit and
3018 // build fingerprint.
3019 if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
3020 SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
3021 status.set_using_snapuserd(old_status.using_snapuserd());
3022 status.set_source_build_fingerprint(old_status.source_build_fingerprint());
3023 status.set_merge_phase(old_status.merge_phase());
3024 status.set_userspace_snapshots(old_status.userspace_snapshots());
3025 status.set_io_uring_enabled(old_status.io_uring_enabled());
3026 status.set_legacy_snapuserd(old_status.legacy_snapuserd());
3027 status.set_o_direct(old_status.o_direct());
3028 }
3029 return WriteSnapshotUpdateStatus(lock, status);
3030 }
3031
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)3032 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
3033 const SnapshotUpdateStatus& status) {
3034 CHECK(lock);
3035 CHECK(lock->lock_mode() == LOCK_EX);
3036
3037 std::string contents;
3038 if (!status.SerializeToString(&contents)) {
3039 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
3040 return false;
3041 }
3042
3043 #ifdef LIBSNAPSHOT_USE_HAL
3044 auto merge_status = MergeStatus::UNKNOWN;
3045 switch (status.state()) {
3046 // The needs-reboot and completed cases imply that /data and /metadata
3047 // can be safely wiped, so we don't report a merge status.
3048 case UpdateState::None:
3049 case UpdateState::MergeNeedsReboot:
3050 case UpdateState::MergeCompleted:
3051 case UpdateState::Initiated:
3052 merge_status = MergeStatus::NONE;
3053 break;
3054 case UpdateState::Unverified:
3055 merge_status = MergeStatus::SNAPSHOTTED;
3056 break;
3057 case UpdateState::Merging:
3058 case UpdateState::MergeFailed:
3059 merge_status = MergeStatus::MERGING;
3060 break;
3061 default:
3062 // Note that Cancelled flows to here - it is never written, since
3063 // it only communicates a transient state to the caller.
3064 LOG(ERROR) << "Unexpected update status: " << status.state();
3065 break;
3066 }
3067
3068 bool set_before_write =
3069 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
3070 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3071 return false;
3072 }
3073 #endif
3074
3075 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
3076 PLOG(ERROR) << "Could not write to state file";
3077 return false;
3078 }
3079
3080 #ifdef LIBSNAPSHOT_USE_HAL
3081 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3082 return false;
3083 }
3084 #endif
3085 return true;
3086 }
3087
GetSnapshotStatusFilePath(const std::string & name)3088 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
3089 auto file = metadata_dir_ + "/snapshots/"s + name;
3090 return file;
3091 }
3092
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)3093 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
3094 SnapshotStatus* status) {
3095 CHECK(lock);
3096 auto path = GetSnapshotStatusFilePath(name);
3097
3098 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3099 if (fd < 0) {
3100 PLOG(ERROR) << "Open failed: " << path;
3101 return false;
3102 }
3103
3104 if (!status->ParseFromFileDescriptor(fd.get())) {
3105 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
3106 return false;
3107 }
3108
3109 if (status->name() != name) {
3110 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
3111 status->set_name(name);
3112 }
3113
3114 return true;
3115 }
3116
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)3117 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
3118 // The caller must take an exclusive lock to modify snapshots.
3119 CHECK(lock);
3120 CHECK(lock->lock_mode() == LOCK_EX);
3121 CHECK(!status.name().empty());
3122
3123 auto path = GetSnapshotStatusFilePath(status.name());
3124
3125 std::string content;
3126 if (!status.SerializeToString(&content)) {
3127 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
3128 return false;
3129 }
3130
3131 if (!WriteStringToFileAtomic(content, path)) {
3132 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
3133 return false;
3134 }
3135
3136 return true;
3137 }
3138
EnsureImageManager()3139 bool SnapshotManager::EnsureImageManager() {
3140 if (images_) return true;
3141
3142 images_ = device_->OpenImageManager();
3143 if (!images_) {
3144 LOG(ERROR) << "Could not open ImageManager";
3145 return false;
3146 }
3147 return true;
3148 }
3149
EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms)3150 bool SnapshotManager::EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms) {
3151 if (snapuserd_client_) {
3152 return true;
3153 }
3154
3155 if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
3156 return false;
3157 }
3158
3159 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, timeout_ms);
3160 if (!snapuserd_client_) {
3161 LOG(ERROR) << "Unable to connect to snapuserd";
3162 return false;
3163 }
3164 return true;
3165 }
3166
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)3167 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
3168 std::vector<std::string> to_delete;
3169 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
3170 if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
3171 LOG(WARNING) << existing_cow_partition->name()
3172 << " cannot be unmapped and its space cannot be reclaimed";
3173 continue;
3174 }
3175 to_delete.push_back(existing_cow_partition->name());
3176 }
3177 for (const auto& name : to_delete) {
3178 current_metadata->RemovePartition(name);
3179 }
3180 }
3181
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3182 static Return AddRequiredSpace(Return orig,
3183 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3184 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
3185 return orig;
3186 }
3187 uint64_t sum = 0;
3188 for (auto&& [name, status] : all_snapshot_status) {
3189 sum += status.cow_file_size();
3190 }
3191 LOG(INFO) << "Calculated needed COW space: " << sum << " bytes";
3192 return Return::NoSpace(sum);
3193 }
3194
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)3195 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
3196 auto lock = LockExclusive();
3197 if (!lock) return Return::Error();
3198
3199 auto update_state = ReadUpdateState(lock.get());
3200 if (update_state != UpdateState::Initiated) {
3201 LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
3202 return Return::Error();
3203 }
3204
3205 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
3206 // partition takes up a big chunk of space in super, causing COW images to be created on
3207 // retrofit Virtual A/B devices.
3208 if (device_->IsOverlayfsSetup()) {
3209 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
3210 << ", reboot, then try again.";
3211 return Return::Error();
3212 }
3213
3214 const auto& opener = device_->GetPartitionOpener();
3215 auto current_suffix = device_->GetSlotSuffix();
3216 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
3217 auto target_suffix = device_->GetOtherSlotSuffix();
3218 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
3219 auto current_super = device_->GetSuperDevice(current_slot);
3220
3221 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
3222 if (current_metadata == nullptr) {
3223 LOG(ERROR) << "Cannot create metadata builder.";
3224 return Return::Error();
3225 }
3226
3227 auto target_metadata =
3228 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
3229 if (target_metadata == nullptr) {
3230 LOG(ERROR) << "Cannot create target metadata builder.";
3231 return Return::Error();
3232 }
3233
3234 // Delete partitions with target suffix in |current_metadata|. Otherwise,
3235 // partition_cow_creator recognizes these left-over partitions as used space.
3236 for (const auto& group_name : current_metadata->ListGroups()) {
3237 if (android::base::EndsWith(group_name, target_suffix)) {
3238 current_metadata->RemoveGroupAndPartitions(group_name);
3239 }
3240 }
3241
3242 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
3243 if (!metadata_updater.Update()) {
3244 LOG(ERROR) << "Cannot calculate new metadata.";
3245 return Return::Error();
3246 }
3247
3248 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
3249 // free regions.
3250 UnmapAndDeleteCowPartition(current_metadata.get());
3251
3252 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
3253 // devices with retrofit dynamic partitions does not make sense.
3254 // This ensures that current_metadata->GetFreeRegions() uses the same device
3255 // indices as target_metadata (i.e. 0 -> "super").
3256 // This is also assumed in MapCowDevices() call below.
3257 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
3258 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
3259
3260 const auto& dap_metadata = manifest.dynamic_partition_metadata();
3261
3262 std::string vabc_disable_reason;
3263 if (!dap_metadata.vabc_enabled()) {
3264 vabc_disable_reason = "not enabled metadata";
3265 } else if (device_->IsRecovery()) {
3266 vabc_disable_reason = "recovery";
3267 } else if (!KernelSupportsCompressedSnapshots()) {
3268 vabc_disable_reason = "kernel missing userspace block device support";
3269 }
3270
3271 // Deduce supported features.
3272 bool userspace_snapshots = CanUseUserspaceSnapshots();
3273 bool legacy_compression = GetLegacyCompressionEnabledProperty();
3274 bool is_legacy_snapuserd = IsVendorFromAndroid12();
3275
3276 if (!vabc_disable_reason.empty()) {
3277 if (userspace_snapshots) {
3278 LOG(INFO) << "Userspace snapshots disabled: " << vabc_disable_reason;
3279 }
3280 if (legacy_compression) {
3281 LOG(INFO) << "Compression disabled: " << vabc_disable_reason;
3282 }
3283 userspace_snapshots = false;
3284 legacy_compression = false;
3285 is_legacy_snapuserd = false;
3286 }
3287
3288 if (legacy_compression || userspace_snapshots) {
3289 if (dap_metadata.cow_version() < kMinCowVersion ||
3290 dap_metadata.cow_version() > kMaxCowVersion) {
3291 LOG(ERROR) << "Manifest cow version is out of bounds (got: "
3292 << dap_metadata.cow_version() << ", min: " << kMinCowVersion
3293 << ", max: " << kMaxCowVersion << ")";
3294 return Return::Error();
3295 }
3296 }
3297
3298 if (!userspace_snapshots && is_legacy_snapuserd && legacy_compression) {
3299 userspace_snapshots = true;
3300 LOG(INFO) << "Vendor from Android 12. Enabling userspace snapshot for OTA install";
3301 }
3302
3303 const bool using_snapuserd = userspace_snapshots || legacy_compression;
3304 if (!using_snapuserd) {
3305 LOG(INFO) << "Using legacy Virtual A/B (dm-snapshot)";
3306 }
3307
3308 std::string compression_algorithm;
3309 uint64_t compression_factor{};
3310 if (using_snapuserd) {
3311 compression_algorithm = dap_metadata.vabc_compression_param();
3312 compression_factor = dap_metadata.compression_factor();
3313 if (compression_algorithm.empty()) {
3314 // Older OTAs don't set an explicit compression type, so default to gz.
3315 compression_algorithm = "gz";
3316 }
3317 LOG(INFO) << "using compression algorithm: " << compression_algorithm
3318 << ", max compressible block size: " << compression_factor;
3319 }
3320 auto read_ahead_size =
3321 android::base::GetUintProperty<uint>("ro.virtual_ab.read_ahead_size", kReadAheadSizeKb);
3322 PartitionCowCreator cow_creator{
3323 .target_metadata = target_metadata.get(),
3324 .target_suffix = target_suffix,
3325 .target_partition = nullptr,
3326 .current_metadata = current_metadata.get(),
3327 .current_suffix = current_suffix,
3328 .update = nullptr,
3329 .extra_extents = {},
3330 .using_snapuserd = using_snapuserd,
3331 .compression_algorithm = compression_algorithm,
3332 .compression_factor = compression_factor,
3333 .read_ahead_size = read_ahead_size,
3334 };
3335
3336 if (dap_metadata.vabc_feature_set().has_threaded()) {
3337 cow_creator.enable_threading = dap_metadata.vabc_feature_set().threaded();
3338 }
3339 if (dap_metadata.vabc_feature_set().has_batch_writes()) {
3340 cow_creator.batched_writes = dap_metadata.vabc_feature_set().batch_writes();
3341 }
3342
3343 // In case of error, automatically delete devices that are created along the way.
3344 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
3345 // these devices.
3346 AutoDeviceList created_devices;
3347 std::map<std::string, SnapshotStatus> all_snapshot_status;
3348 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
3349 &all_snapshot_status);
3350 if (!ret.is_ok()) {
3351 LOG(ERROR) << "CreateUpdateSnapshotsInternal failed: " << ret.string();
3352 return ret;
3353 }
3354
3355 auto exported_target_metadata = target_metadata->Export();
3356 if (exported_target_metadata == nullptr) {
3357 LOG(ERROR) << "Cannot export target metadata";
3358 return Return::Error();
3359 }
3360
3361 ret = InitializeUpdateSnapshots(lock.get(), dap_metadata.cow_version(), target_metadata.get(),
3362 exported_target_metadata.get(), target_suffix,
3363 all_snapshot_status);
3364 if (!ret.is_ok()) return ret;
3365
3366 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
3367 *exported_target_metadata, target_slot)) {
3368 LOG(ERROR) << "Cannot write target metadata";
3369 return Return::Error();
3370 }
3371
3372 // If snapuserd is enabled, we need to retain a copy of the old metadata
3373 // so we can access original blocks in case they are moved around. We do
3374 // not want to rely on the old super metadata slot because we don't
3375 // guarantee its validity after the slot switch is successful.
3376 if (using_snapuserd) {
3377 auto metadata = current_metadata->Export();
3378 if (!metadata) {
3379 LOG(ERROR) << "Could not export current metadata";
3380 return Return::Error();
3381 }
3382
3383 auto path = GetOldPartitionMetadataPath();
3384 if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
3385 LOG(ERROR) << "Cannot write old metadata to " << path;
3386 return Return::Error();
3387 }
3388 }
3389
3390 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3391 status.set_state(update_state);
3392 status.set_using_snapuserd(using_snapuserd);
3393
3394 if (userspace_snapshots) {
3395 status.set_userspace_snapshots(true);
3396 LOG(INFO) << "Virtual A/B using userspace snapshots";
3397
3398 if (GetIouringEnabledProperty()) {
3399 status.set_io_uring_enabled(true);
3400 LOG(INFO) << "io_uring for snapshots enabled";
3401 }
3402 if (GetODirectEnabledProperty()) {
3403 status.set_o_direct(true);
3404 LOG(INFO) << "o_direct for source image enabled";
3405 }
3406 if (is_legacy_snapuserd) {
3407 status.set_legacy_snapuserd(true);
3408 LOG(INFO) << "Setting legacy_snapuserd to true";
3409 }
3410 } else if (legacy_compression) {
3411 LOG(INFO) << "Virtual A/B using legacy snapuserd";
3412 } else {
3413 LOG(INFO) << "Virtual A/B using dm-snapshot";
3414 }
3415
3416 is_snapshot_userspace_.emplace(userspace_snapshots);
3417 is_legacy_snapuserd_.emplace(is_legacy_snapuserd);
3418
3419 if (!device()->IsTestDevice() && using_snapuserd) {
3420 // Terminate stale daemon if any
3421 std::unique_ptr<SnapuserdClient> snapuserd_client = std::move(snapuserd_client_);
3422 if (!snapuserd_client) {
3423 snapuserd_client = SnapuserdClient::TryConnect(kSnapuserdSocket, 5s);
3424 }
3425 if (snapuserd_client) {
3426 snapuserd_client->DetachSnapuserd();
3427 snapuserd_client = nullptr;
3428 }
3429 }
3430
3431 if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
3432 LOG(ERROR) << "Unable to write new update state";
3433 return Return::Error();
3434 }
3435
3436 created_devices.Release();
3437 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
3438
3439 return Return::Ok();
3440 }
3441
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)3442 Return SnapshotManager::CreateUpdateSnapshotsInternal(
3443 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
3444 AutoDeviceList* created_devices,
3445 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
3446 CHECK(lock);
3447
3448 auto* target_metadata = cow_creator->target_metadata;
3449 const auto& target_suffix = cow_creator->target_suffix;
3450
3451 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
3452 LOG(ERROR) << "Cannot add group " << kCowGroupName;
3453 return Return::Error();
3454 }
3455
3456 std::map<std::string, const PartitionUpdate*> partition_map;
3457 std::map<std::string, std::vector<Extent>> extra_extents_map;
3458 for (const auto& partition_update : manifest.partitions()) {
3459 auto suffixed_name = partition_update.partition_name() + target_suffix;
3460 auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
3461 if (!inserted) {
3462 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
3463 << " in update manifest.";
3464 return Return::Error();
3465 }
3466
3467 auto& extra_extents = extra_extents_map[suffixed_name];
3468 if (partition_update.has_hash_tree_extent()) {
3469 extra_extents.push_back(partition_update.hash_tree_extent());
3470 }
3471 if (partition_update.has_fec_extent()) {
3472 extra_extents.push_back(partition_update.fec_extent());
3473 }
3474 }
3475
3476 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3477 cow_creator->target_partition = target_partition;
3478 cow_creator->update = nullptr;
3479 auto iter = partition_map.find(target_partition->name());
3480 if (iter != partition_map.end()) {
3481 cow_creator->update = iter->second;
3482 } else {
3483 LOG(INFO) << target_partition->name()
3484 << " isn't included in the payload, skipping the cow creation.";
3485 continue;
3486 }
3487
3488 cow_creator->extra_extents.clear();
3489 auto extra_extents_it = extra_extents_map.find(target_partition->name());
3490 if (extra_extents_it != extra_extents_map.end()) {
3491 cow_creator->extra_extents = std::move(extra_extents_it->second);
3492 }
3493
3494 // Compute the device sizes for the partition.
3495 auto cow_creator_ret = cow_creator->Run();
3496 if (!cow_creator_ret.has_value()) {
3497 LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
3498 return Return::Error();
3499 }
3500
3501 LOG(INFO) << "For partition " << target_partition->name()
3502 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
3503 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
3504 << ", cow partition size = "
3505 << cow_creator_ret->snapshot_status.cow_partition_size()
3506 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
3507
3508 // Delete any existing snapshot before re-creating one.
3509 if (!DeleteSnapshot(lock, target_partition->name())) {
3510 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
3511 << target_partition->name();
3512 return Return::Error();
3513 }
3514
3515 // It is possible that the whole partition uses free space in super, and snapshot / COW
3516 // would not be needed. In this case, skip the partition.
3517 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
3518 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
3519 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
3520 CHECK(needs_snapshot == needs_cow);
3521
3522 if (!needs_snapshot) {
3523 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
3524 << "because nothing needs to be snapshotted.";
3525 continue;
3526 }
3527
3528 // Find the original partition size.
3529 auto name = target_partition->name();
3530 auto old_partition_name =
3531 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
3532 auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
3533 if (old_partition) {
3534 cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
3535 }
3536
3537 // Store these device sizes to snapshot status file.
3538 if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
3539 return Return::Error();
3540 }
3541 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
3542
3543 // Create the COW partition. That is, use any remaining free space in super partition before
3544 // creating the COW images.
3545 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3546 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3547 << "cow_partition_size == "
3548 << cow_creator_ret->snapshot_status.cow_partition_size()
3549 << " is not a multiple of sector size " << kSectorSize;
3550 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3551 kCowGroupName, 0 /* flags */);
3552 if (cow_partition == nullptr) {
3553 return Return::Error();
3554 }
3555
3556 if (!target_metadata->ResizePartition(
3557 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3558 cow_creator_ret->cow_partition_usable_regions)) {
3559 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3560 << cow_creator_ret->snapshot_status.cow_partition_size();
3561 return Return::Error();
3562 }
3563 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3564 // error in the future.
3565 }
3566
3567 all_snapshot_status->emplace(target_partition->name(),
3568 std::move(cow_creator_ret->snapshot_status));
3569
3570 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3571 }
3572
3573 LOG(INFO) << "Allocating CoW images.";
3574
3575 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3576 // Create the backing COW image if necessary.
3577 if (snapshot_status.cow_file_size() > 0) {
3578 auto ret = CreateCowImage(lock, name);
3579 if (!ret.is_ok()) {
3580 LOG(ERROR) << "CreateCowImage failed: " << ret.string();
3581 return AddRequiredSpace(ret, *all_snapshot_status);
3582 }
3583 }
3584
3585 LOG(INFO) << "Successfully created snapshot for " << name;
3586 }
3587
3588 return Return::Ok();
3589 }
3590
InitializeUpdateSnapshots(LockedFile * lock,uint32_t cow_version,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3591 Return SnapshotManager::InitializeUpdateSnapshots(
3592 LockedFile* lock, uint32_t cow_version, MetadataBuilder* target_metadata,
3593 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3594 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3595 CHECK(lock);
3596
3597 CreateLogicalPartitionParams cow_params{
3598 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3599 .metadata = exported_target_metadata,
3600 .timeout_ms = std::chrono::milliseconds::max(),
3601 .partition_opener = &device_->GetPartitionOpener(),
3602 };
3603 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3604 AutoDeviceList created_devices_for_cow;
3605
3606 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3607 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3608 << target_partition->name();
3609 return Return::Error();
3610 }
3611
3612 auto it = all_snapshot_status.find(target_partition->name());
3613 if (it == all_snapshot_status.end()) continue;
3614 cow_params.partition_name = target_partition->name();
3615 std::string cow_name;
3616 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3617 return Return::Error();
3618 }
3619
3620 std::string cow_path;
3621 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3622 LOG(ERROR) << "Cannot determine path for " << cow_name;
3623 return Return::Error();
3624 }
3625
3626 if (!android::fs_mgr::WaitForFile(cow_path, 6s)) {
3627 LOG(ERROR) << "Timed out waiting for device to appear: " << cow_path;
3628 return Return::Error();
3629 }
3630
3631 if (it->second.using_snapuserd()) {
3632 unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3633 if (fd < 0) {
3634 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3635 << cow_params.partition_name;
3636 return Return::Error();
3637 }
3638
3639 CowOptions options;
3640 if (device()->IsTestDevice()) {
3641 options.scratch_space = false;
3642 }
3643 options.compression = it->second.compression_algorithm();
3644 if (cow_version >= 3) {
3645 options.op_count_max = it->second.estimated_ops_buffer_size();
3646 options.max_blocks = {it->second.device_size() / options.block_size};
3647 }
3648
3649 auto writer = CreateCowWriter(cow_version, options, std::move(fd));
3650 if (!writer->Finalize()) {
3651 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3652 return Return::Error();
3653 }
3654 } else {
3655 auto ret = InitializeKernelCow(cow_path);
3656 if (!ret.is_ok()) {
3657 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3658 << cow_path;
3659 return AddRequiredSpace(ret, all_snapshot_status);
3660 }
3661 }
3662 // Let destructor of created_devices_for_cow to unmap the COW devices.
3663 };
3664 return Return::Ok();
3665 }
3666
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3667 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3668 std::string* snapshot_path) {
3669 auto lock = LockShared();
3670 if (!lock) return false;
3671 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3672 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3673 << params.GetPartitionName();
3674 return false;
3675 }
3676
3677 SnapshotStatus status;
3678 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3679 return false;
3680 }
3681 if (status.using_snapuserd()) {
3682 LOG(ERROR) << "Cannot use MapUpdateSnapshot with snapuserd";
3683 return false;
3684 }
3685
3686 SnapshotPaths paths;
3687 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3688 return false;
3689 }
3690
3691 if (!paths.snapshot_device.empty()) {
3692 *snapshot_path = paths.snapshot_device;
3693 } else {
3694 *snapshot_path = paths.target_device;
3695 }
3696 DCHECK(!snapshot_path->empty());
3697 return true;
3698 }
3699
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,std::optional<uint64_t> label)3700 std::unique_ptr<ICowWriter> SnapshotManager::OpenSnapshotWriter(
3701 const android::fs_mgr::CreateLogicalPartitionParams& params,
3702 std::optional<uint64_t> label) {
3703 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3704 (void)params;
3705 (void)label;
3706
3707 LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3708 return nullptr;
3709 #else
3710 // First unmap any existing mapping.
3711 auto lock = LockShared();
3712 if (!lock) return nullptr;
3713 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3714 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3715 << params.GetPartitionName();
3716 return nullptr;
3717 }
3718
3719 SnapshotPaths paths;
3720 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3721 return nullptr;
3722 }
3723
3724 SnapshotStatus status;
3725 if (!paths.cow_device_name.empty()) {
3726 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3727 return nullptr;
3728 }
3729 } else {
3730 // Currently, partition_cow_creator always creates snapshots. The
3731 // reason is that if partition X shrinks while partition Y grows, we
3732 // cannot bindly write to the newly freed extents in X. This would
3733 // make the old slot unusable. So, the entire size of the target
3734 // partition is currently considered snapshottable.
3735 LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3736 return nullptr;
3737 }
3738
3739 if (!status.using_snapuserd()) {
3740 LOG(ERROR) << "Can only create snapshot writers with userspace or compressed snapshots";
3741 return nullptr;
3742 }
3743
3744 return OpenCompressedSnapshotWriter(lock.get(), status, paths, label);
3745 #endif
3746 }
3747
3748 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const SnapshotStatus & status,const SnapshotPaths & paths,std::optional<uint64_t> label)3749 std::unique_ptr<ICowWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3750 LockedFile* lock, const SnapshotStatus& status, const SnapshotPaths& paths,
3751 std::optional<uint64_t> label) {
3752 CHECK(lock);
3753
3754 CowOptions cow_options;
3755 cow_options.compression = status.compression_algorithm();
3756 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3757 cow_options.batch_write = status.batched_writes();
3758 cow_options.num_compress_threads = status.enable_threading() ? 2 : 1;
3759 cow_options.op_count_max = status.estimated_ops_buffer_size();
3760 cow_options.compression_factor = status.compression_factor();
3761 // Disable scratch space for vts tests
3762 if (device()->IsTestDevice()) {
3763 cow_options.scratch_space = false;
3764 }
3765
3766 // Currently we don't support partial snapshots, since partition_cow_creator
3767 // never creates this scenario.
3768 CHECK(status.snapshot_size() == status.device_size());
3769
3770 std::string cow_path;
3771 if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3772 LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3773 return nullptr;
3774 }
3775
3776 unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3777 if (cow_fd < 0) {
3778 PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3779 return nullptr;
3780 }
3781
3782 CowHeaderV3 header;
3783 if (!ReadCowHeader(cow_fd, &header)) {
3784 LOG(ERROR) << "OpenCompressedSnapshotWriter: read header failed";
3785 return nullptr;
3786 }
3787
3788 return CreateCowWriter(header.prefix.major_version, cow_options, std::move(cow_fd), label);
3789 }
3790 #endif // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3791
UnmapUpdateSnapshot(const std::string & target_partition_name)3792 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3793 auto lock = LockShared();
3794 if (!lock) return false;
3795 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3796 }
3797
UnmapAllPartitionsInRecovery()3798 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3799 auto lock = LockExclusive();
3800 if (!lock) return false;
3801
3802 const auto& opener = device_->GetPartitionOpener();
3803 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3804 auto super_device = device_->GetSuperDevice(slot);
3805 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3806 if (!metadata) {
3807 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3808 return false;
3809 }
3810
3811 bool ok = true;
3812 for (const auto& partition : metadata->partitions) {
3813 auto partition_name = GetPartitionName(partition);
3814 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3815 }
3816 return ok;
3817 }
3818
operator <<(std::ostream & os,SnapshotManager::Slot slot)3819 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3820 switch (slot) {
3821 case SnapshotManager::Slot::Unknown:
3822 return os << "unknown";
3823 case SnapshotManager::Slot::Source:
3824 return os << "source";
3825 case SnapshotManager::Slot::Target:
3826 return os << "target";
3827 }
3828 }
3829
Dump(std::ostream & os)3830 bool SnapshotManager::Dump(std::ostream& os) {
3831 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3832 // if it is racy.
3833 auto file = OpenLock(0 /* lock flag */);
3834 if (!file) return false;
3835
3836 std::stringstream ss;
3837
3838 auto update_status = ReadSnapshotUpdateStatus(file.get());
3839
3840 ss << "Update state: " << update_status.state() << std::endl;
3841 ss << "Using snapuserd: " << update_status.using_snapuserd() << std::endl;
3842 ss << "Using userspace snapshots: " << update_status.userspace_snapshots() << std::endl;
3843 ss << "Using io_uring: " << update_status.io_uring_enabled() << std::endl;
3844 ss << "Using o_direct: " << update_status.o_direct() << std::endl;
3845 ss << "Using XOR compression: " << GetXorCompressionEnabledProperty() << std::endl;
3846 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3847 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3848 ss << "Rollback indicator: "
3849 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3850 << std::endl;
3851 ss << "Forward merge indicator: "
3852 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3853 << std::endl;
3854 ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3855
3856 if (update_status.state() == UpdateState::Merging) {
3857 ss << "Merge completion: ";
3858 if (!EnsureSnapuserdConnected()) {
3859 ss << "N/A";
3860 } else {
3861 ss << snapuserd_client_->GetMergePercent() << "%";
3862 }
3863 ss << std::endl;
3864 ss << "Merge phase: " << update_status.merge_phase() << std::endl;
3865 }
3866
3867 bool ok = true;
3868 std::vector<std::string> snapshots;
3869 if (!ListSnapshots(file.get(), &snapshots)) {
3870 LOG(ERROR) << "Could not list snapshots";
3871 snapshots.clear();
3872 ok = false;
3873 }
3874 for (const auto& name : snapshots) {
3875 ss << "Snapshot: " << name << std::endl;
3876 SnapshotStatus status;
3877 if (!ReadSnapshotStatus(file.get(), name, &status)) {
3878 ok = false;
3879 continue;
3880 }
3881 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
3882 ss << " device size (bytes): " << status.device_size() << std::endl;
3883 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
3884 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3885 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
3886 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
3887 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
3888 ss << " compression: " << status.compression_algorithm() << std::endl;
3889 ss << " compression factor: " << status.compression_factor() << std::endl;
3890 ss << " merge phase: " << DecideMergePhase(status) << std::endl;
3891 }
3892 os << ss.rdbuf();
3893 return ok;
3894 }
3895
EnsureMetadataMounted()3896 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3897 if (!device_->IsRecovery()) {
3898 // No need to mount anything in recovery.
3899 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3900 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3901 }
3902 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3903 if (ret == nullptr) return nullptr;
3904
3905 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3906 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3907 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3908 // treat this case as if /metadata is not mounted.
3909 if (!LockShared()) {
3910 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3911 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3912 return nullptr;
3913 }
3914 return ret;
3915 }
3916
HandleImminentDataWipe(const std::function<void ()> & callback)3917 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3918 if (!device_->IsRecovery()) {
3919 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3920 return false;
3921 }
3922
3923 auto mount = EnsureMetadataMounted();
3924 if (!mount || !mount->HasDevice()) {
3925 // We allow the wipe to continue, because if we can't mount /metadata,
3926 // it is unlikely the device would have booted anyway. If there is no
3927 // metadata partition, then the device predates Virtual A/B.
3928 return true;
3929 }
3930
3931 // Check this early, so we don't accidentally start trying to populate
3932 // the state file in recovery. Note we don't call GetUpdateState since
3933 // we want errors in acquiring the lock to be propagated, instead of
3934 // returning UpdateState::None.
3935 auto state_file = GetStateFilePath();
3936 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3937 return true;
3938 }
3939
3940 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3941 auto super_path = device_->GetSuperDevice(slot_number);
3942 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3943 LOG(ERROR) << "Unable to map partitions to complete merge.";
3944 return false;
3945 }
3946
3947 auto process_callback = [&]() -> bool {
3948 if (callback) {
3949 callback();
3950 }
3951 return true;
3952 };
3953
3954 in_factory_data_reset_ = true;
3955 UpdateState state =
3956 ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3957 in_factory_data_reset_ = false;
3958
3959 if (state == UpdateState::MergeFailed) {
3960 return false;
3961 }
3962
3963 // Nothing should be depending on partitions now, so unmap them all.
3964 if (!UnmapAllPartitionsInRecovery()) {
3965 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3966 }
3967
3968 if (state != UpdateState::None) {
3969 auto lock = LockExclusive();
3970 if (!lock) return false;
3971
3972 // Zap the update state so the bootloader doesn't think we're still
3973 // merging. It's okay if this fails, it's informative only at this
3974 // point.
3975 WriteUpdateState(lock.get(), UpdateState::None);
3976 }
3977 return true;
3978 }
3979
FinishMergeInRecovery()3980 bool SnapshotManager::FinishMergeInRecovery() {
3981 if (!device_->IsRecovery()) {
3982 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3983 return false;
3984 }
3985
3986 auto mount = EnsureMetadataMounted();
3987 if (!mount || !mount->HasDevice()) {
3988 return false;
3989 }
3990
3991 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3992 auto super_path = device_->GetSuperDevice(slot_number);
3993 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3994 LOG(ERROR) << "Unable to map partitions to complete merge.";
3995 return false;
3996 }
3997
3998 UpdateState state = ProcessUpdateState();
3999 if (state != UpdateState::MergeCompleted) {
4000 LOG(ERROR) << "Merge returned unexpected status: " << state;
4001 return false;
4002 }
4003
4004 // Nothing should be depending on partitions now, so unmap them all.
4005 if (!UnmapAllPartitionsInRecovery()) {
4006 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
4007 }
4008 return true;
4009 }
4010
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)4011 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
4012 const std::function<bool()>& callback) {
4013 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
4014 UpdateState state = ProcessUpdateState(callback);
4015 LOG(INFO) << "Update state in recovery: " << state;
4016 switch (state) {
4017 case UpdateState::MergeFailed:
4018 LOG(ERROR) << "Unrecoverable merge failure detected.";
4019 return state;
4020 case UpdateState::Unverified: {
4021 // If an OTA was just applied but has not yet started merging:
4022 //
4023 // - if forward merge is allowed, initiate merge and call
4024 // ProcessUpdateState again.
4025 //
4026 // - if forward merge is not allowed, we
4027 // have no choice but to revert slots, because the current slot will
4028 // immediately become unbootable. Rather than wait for the device
4029 // to reboot N times until a rollback, we proactively disable the
4030 // new slot instead.
4031 //
4032 // Since the rollback is inevitable, we don't treat a HAL failure
4033 // as an error here.
4034 auto slot = GetCurrentSlot();
4035 if (slot == Slot::Target) {
4036 if (allow_forward_merge &&
4037 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
4038 LOG(INFO) << "Forward merge allowed, initiating merge now.";
4039
4040 if (!InitiateMerge()) {
4041 LOG(ERROR) << "Failed to initiate merge on data wipe.";
4042 return UpdateState::MergeFailed;
4043 }
4044 return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
4045 }
4046
4047 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
4048 device_->SetSlotAsUnbootable(slot_number);
4049 } else {
4050 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
4051 }
4052 break;
4053 }
4054 case UpdateState::MergeNeedsReboot:
4055 // We shouldn't get here, because nothing is depending on
4056 // logical partitions.
4057 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
4058 break;
4059 default:
4060 break;
4061 }
4062 return state;
4063 }
4064
EnsureNoOverflowSnapshot(LockedFile * lock)4065 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
4066 CHECK(lock);
4067
4068 std::vector<std::string> snapshots;
4069 if (!ListSnapshots(lock, &snapshots)) {
4070 LOG(ERROR) << "Could not list snapshots.";
4071 return false;
4072 }
4073
4074 for (const auto& snapshot : snapshots) {
4075 SnapshotStatus status;
4076 if (!ReadSnapshotStatus(lock, snapshot, &status)) {
4077 return false;
4078 }
4079 if (status.using_snapuserd()) {
4080 continue;
4081 }
4082
4083 std::vector<DeviceMapper::TargetInfo> targets;
4084 if (!dm_.GetTableStatus(snapshot, &targets)) {
4085 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
4086 return false;
4087 }
4088 if (targets.size() != 1) {
4089 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
4090 << ", size = " << targets.size();
4091 return false;
4092 }
4093 if (targets[0].IsOverflowSnapshot()) {
4094 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
4095 << ", CoW device size computation is wrong!";
4096 return false;
4097 }
4098 }
4099
4100 return true;
4101 }
4102
RecoveryCreateSnapshotDevices()4103 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
4104 if (!device_->IsRecovery()) {
4105 LOG(ERROR) << __func__ << " is only allowed in recovery.";
4106 return CreateResult::NOT_CREATED;
4107 }
4108
4109 auto mount = EnsureMetadataMounted();
4110 if (!mount || !mount->HasDevice()) {
4111 LOG(ERROR) << "Couldn't mount Metadata.";
4112 return CreateResult::NOT_CREATED;
4113 }
4114 return RecoveryCreateSnapshotDevices(mount);
4115 }
4116
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)4117 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
4118 const std::unique_ptr<AutoDevice>& metadata_device) {
4119 if (!device_->IsRecovery()) {
4120 LOG(ERROR) << __func__ << " is only allowed in recovery.";
4121 return CreateResult::NOT_CREATED;
4122 }
4123
4124 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
4125 LOG(ERROR) << "Metadata not mounted.";
4126 return CreateResult::NOT_CREATED;
4127 }
4128
4129 auto state_file = GetStateFilePath();
4130 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
4131 LOG(ERROR) << "Couldn't access state file.";
4132 return CreateResult::NOT_CREATED;
4133 }
4134
4135 if (!NeedSnapshotsInFirstStageMount()) {
4136 return CreateResult::NOT_CREATED;
4137 }
4138
4139 auto slot_suffix = device_->GetOtherSlotSuffix();
4140 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
4141 auto super_path = device_->GetSuperDevice(slot_number);
4142 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4143 LOG(ERROR) << "Unable to map partitions.";
4144 return CreateResult::ERROR;
4145 }
4146 return CreateResult::CREATED;
4147 }
4148
UpdateForwardMergeIndicator(bool wipe)4149 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
4150 auto path = GetForwardMergeIndicatorPath();
4151
4152 if (!wipe) {
4153 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
4154 return RemoveFileIfExists(path);
4155 }
4156
4157 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
4158
4159 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
4160 if (!android::base::WriteStringToFile("1", path)) {
4161 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
4162 return false;
4163 }
4164
4165 return true;
4166 }
4167
GetSnapshotMergeStatsInstance()4168 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
4169 return SnapshotMergeStats::GetInstance(*this);
4170 }
4171
4172 // This is only to be used in recovery or normal Android (not first-stage init).
4173 // We don't guarantee dm paths are available in first-stage init, because ueventd
4174 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)4175 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
4176 std::string* device_path) {
4177 // Try getting the device string if it is a device mapper device.
4178 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4179 return dm_.GetDmDevicePathByName(device_name, device_path);
4180 }
4181
4182 // Otherwise, get path from IImageManager.
4183 return images_->GetMappedImageDevice(device_name, device_path);
4184 }
4185
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)4186 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
4187 std::string* device_string_or_mapped_path) {
4188 // Try getting the device string if it is a device mapper device.
4189 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4190 return dm_.GetDeviceString(device_name, device_string_or_mapped_path);
4191 }
4192
4193 // Otherwise, get path from IImageManager.
4194 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
4195 return false;
4196 }
4197
4198 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
4199 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
4200 << "may not be available in first stage init! ";
4201 return true;
4202 }
4203
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)4204 bool SnapshotManager::WaitForDevice(const std::string& device,
4205 std::chrono::milliseconds timeout_ms) {
4206 if (!android::base::StartsWith(device, "/")) {
4207 return true;
4208 }
4209
4210 // In first-stage init, we rely on init setting a callback which can
4211 // regenerate uevents and populate /dev for us.
4212 if (uevent_regen_callback_) {
4213 if (!uevent_regen_callback_(device)) {
4214 LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
4215 return false;
4216 }
4217 return true;
4218 }
4219
4220 // Otherwise, the only kind of device we need to wait for is a dm-user
4221 // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
4222 // the path has been created.
4223 if (!android::base::StartsWith(device, "/dev/dm-user/")) {
4224 return true;
4225 }
4226
4227 if (timeout_ms.count() == 0) {
4228 LOG(ERROR) << "No timeout was specified to wait for device: " << device;
4229 return false;
4230 }
4231 if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
4232 LOG(ERROR) << "Timed out waiting for device to appear: " << device;
4233 return false;
4234 }
4235 return true;
4236 }
4237
IsSnapuserdRequired()4238 bool SnapshotManager::IsSnapuserdRequired() {
4239 auto lock = LockExclusive();
4240 if (!lock) return false;
4241
4242 auto status = ReadSnapshotUpdateStatus(lock.get());
4243 return status.state() != UpdateState::None && status.using_snapuserd();
4244 }
4245
PrepareSnapuserdArgsForSelinux(std::vector<std::string> * snapuserd_argv)4246 bool SnapshotManager::PrepareSnapuserdArgsForSelinux(std::vector<std::string>* snapuserd_argv) {
4247 return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
4248 }
4249
DetachFirstStageSnapuserdForSelinux()4250 bool SnapshotManager::DetachFirstStageSnapuserdForSelinux() {
4251 LOG(INFO) << "Detaching first stage snapuserd";
4252
4253 auto lock = LockExclusive();
4254 if (!lock) return false;
4255
4256 std::vector<std::string> snapshots;
4257 if (!ListSnapshots(lock.get(), &snapshots)) {
4258 LOG(ERROR) << "Failed to list snapshots.";
4259 return false;
4260 }
4261
4262 size_t num_cows = 0;
4263 size_t ok_cows = 0;
4264 for (const auto& snapshot : snapshots) {
4265 std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
4266
4267 if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
4268 continue;
4269 }
4270
4271 DeviceMapper::TargetInfo target;
4272 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
4273 continue;
4274 }
4275
4276 auto target_type = DeviceMapper::GetTargetType(target.spec);
4277 if (target_type != "user") {
4278 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
4279 continue;
4280 }
4281
4282 num_cows++;
4283 auto misc_name = user_cow_name;
4284
4285 DmTable table;
4286 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
4287 if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
4288 LOG(ERROR) << "Unable to swap tables for " << misc_name;
4289 continue;
4290 }
4291
4292 // Wait for ueventd to acknowledge and create the control device node.
4293 std::string control_device = "/dev/dm-user/" + misc_name;
4294 if (!WaitForDevice(control_device, 10s)) {
4295 LOG(ERROR) << "dm-user control device no found: " << misc_name;
4296 continue;
4297 }
4298
4299 ok_cows++;
4300 LOG(INFO) << "control device is ready: " << control_device;
4301 }
4302
4303 if (ok_cows != num_cows) {
4304 LOG(ERROR) << "Could not transition all snapuserd consumers.";
4305 return false;
4306 }
4307
4308 return true;
4309 }
4310
PerformSecondStageInitTransition()4311 bool SnapshotManager::PerformSecondStageInitTransition() {
4312 return PerformInitTransition(InitTransition::SECOND_STAGE);
4313 }
4314
ReadOldPartitionMetadata(LockedFile * lock)4315 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
4316 CHECK(lock);
4317
4318 if (!old_partition_metadata_) {
4319 auto path = GetOldPartitionMetadataPath();
4320 old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
4321 if (!old_partition_metadata_) {
4322 LOG(ERROR) << "Could not read old partition metadata from " << path;
4323 return nullptr;
4324 }
4325 }
4326 return old_partition_metadata_.get();
4327 }
4328
DecideMergePhase(const SnapshotStatus & status)4329 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
4330 if (status.using_snapuserd() && status.device_size() < status.old_partition_size()) {
4331 return MergePhase::FIRST_PHASE;
4332 }
4333 return MergePhase::SECOND_PHASE;
4334 }
4335
UpdateCowStats(ISnapshotMergeStats * stats)4336 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
4337 auto lock = LockExclusive();
4338 if (!lock) return;
4339
4340 std::vector<std::string> snapshots;
4341 if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
4342 LOG(ERROR) << "Could not list snapshots";
4343 return;
4344 }
4345
4346 uint64_t cow_file_size = 0;
4347 uint64_t total_cow_size = 0;
4348 uint64_t estimated_cow_size = 0;
4349 for (const auto& snapshot : snapshots) {
4350 SnapshotStatus status;
4351 if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
4352 return;
4353 }
4354
4355 cow_file_size += status.cow_file_size();
4356 total_cow_size += status.cow_file_size() + status.cow_partition_size();
4357 estimated_cow_size += status.estimated_cow_size();
4358 }
4359
4360 stats->report()->set_cow_file_size(cow_file_size);
4361 stats->report()->set_total_cow_size_bytes(total_cow_size);
4362 stats->report()->set_estimated_cow_size_bytes(estimated_cow_size);
4363 }
4364
SetMergeStatsFeatures(ISnapshotMergeStats * stats)4365 void SnapshotManager::SetMergeStatsFeatures(ISnapshotMergeStats* stats) {
4366 auto lock = LockExclusive();
4367 if (!lock) return;
4368
4369 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4370 stats->report()->set_iouring_used(update_status.io_uring_enabled());
4371 stats->report()->set_userspace_snapshots_used(update_status.userspace_snapshots());
4372 stats->report()->set_xor_compression_used(GetXorCompressionEnabledProperty());
4373 }
4374
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)4375 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
4376 const std::chrono::milliseconds& timeout_ms) {
4377 auto start = std::chrono::steady_clock::now();
4378 while (true) {
4379 if (dm_.DeleteDeviceIfExists(name)) {
4380 return true;
4381 }
4382 auto now = std::chrono::steady_clock::now();
4383 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
4384 if (elapsed >= timeout_ms) {
4385 break;
4386 }
4387 std::this_thread::sleep_for(400ms);
4388 }
4389
4390 // Try to diagnose why this failed. First get the actual device path.
4391 std::string full_path;
4392 if (!dm_.GetDmDevicePathByName(name, &full_path)) {
4393 LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
4394 return false;
4395 }
4396
4397 // Check for child dm-devices.
4398 std::string block_name = android::base::Basename(full_path);
4399 std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
4400
4401 std::error_code ec;
4402 std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
4403 if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
4404 LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
4405 return false;
4406 }
4407
4408 // Check for mounted partitions.
4409 android::fs_mgr::Fstab fstab;
4410 android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
4411 for (const auto& entry : fstab) {
4412 if (android::base::Basename(entry.blk_device) == block_name) {
4413 LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
4414 return false;
4415 }
4416 }
4417
4418 // Check for detached mounted partitions.
4419 for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
4420 std::string fs_type = android::base::Basename(fs.path().c_str());
4421 if (!(fs_type == "ext4" || fs_type == "f2fs")) {
4422 continue;
4423 }
4424
4425 std::string path = fs.path().c_str() + "/"s + block_name;
4426 if (access(path.c_str(), F_OK) == 0) {
4427 LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
4428 << "; possibly open file descriptor or attached loop device.";
4429 return false;
4430 }
4431 }
4432
4433 LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
4434 << " still in use."
4435 << " Probably a file descriptor was leaked or held open, or a loop device is"
4436 << " attached.";
4437 return false;
4438 }
4439
ReadMergeFailureCode()4440 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
4441 auto lock = LockExclusive();
4442 if (!lock) return MergeFailureCode::AcquireLock;
4443
4444 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4445 if (status.state() != UpdateState::MergeFailed) {
4446 return MergeFailureCode::Ok;
4447 }
4448 return status.merge_failure_code();
4449 }
4450
ReadSourceBuildFingerprint()4451 std::string SnapshotManager::ReadSourceBuildFingerprint() {
4452 auto lock = LockExclusive();
4453 if (!lock) return {};
4454
4455 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4456 return status.source_build_fingerprint();
4457 }
4458
IsUserspaceSnapshotUpdateInProgress()4459 bool SnapshotManager::IsUserspaceSnapshotUpdateInProgress() {
4460 // We cannot grab /metadata/ota lock here as this
4461 // is in reboot path. See b/308900853
4462 //
4463 // Check if any of the partitions are mounted
4464 // off dm-user block device. If so, then we are certain
4465 // that OTA update in progress.
4466 auto current_suffix = device_->GetSlotSuffix();
4467 auto& dm = DeviceMapper::Instance();
4468 auto dm_block_devices = dm.FindDmPartitions();
4469 if (dm_block_devices.empty()) {
4470 LOG(ERROR) << "No dm-enabled block device is found.";
4471 return false;
4472 }
4473 for (auto& partition : dm_block_devices) {
4474 std::string partition_name = partition.first + current_suffix;
4475 DeviceMapper::TargetInfo snap_target;
4476 if (!GetSingleTarget(partition_name, TableQuery::Status, &snap_target)) {
4477 return false;
4478 }
4479 auto type = DeviceMapper::GetTargetType(snap_target.spec);
4480 if (type == "user") {
4481 return true;
4482 }
4483 }
4484 return false;
4485 }
4486
BootFromSnapshotsWithoutSlotSwitch()4487 bool SnapshotManager::BootFromSnapshotsWithoutSlotSwitch() {
4488 auto lock = LockExclusive();
4489 if (!lock) return false;
4490
4491 auto contents = device_->GetSlotSuffix();
4492 // This is the indicator which tells first-stage init
4493 // to boot from snapshots even though there was no slot-switch
4494 auto boot_file = GetBootSnapshotsWithoutSlotSwitchPath();
4495 if (!WriteStringToFileAtomic(contents, boot_file)) {
4496 PLOG(ERROR) << "write failed: " << boot_file;
4497 return false;
4498 }
4499
4500 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4501 update_status.set_state(UpdateState::Initiated);
4502 update_status.set_userspace_snapshots(true);
4503 update_status.set_using_snapuserd(true);
4504 if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4505 return false;
4506 }
4507 return true;
4508 }
4509
PrepareDeviceToBootWithoutSnapshot()4510 bool SnapshotManager::PrepareDeviceToBootWithoutSnapshot() {
4511 auto lock = LockExclusive();
4512 if (!lock) return false;
4513
4514 android::base::RemoveFileIfExists(GetSnapshotBootIndicatorPath());
4515 android::base::RemoveFileIfExists(GetBootSnapshotsWithoutSlotSwitchPath());
4516
4517 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4518 update_status.set_state(UpdateState::Cancelled);
4519 if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4520 return false;
4521 }
4522 return true;
4523 }
4524
SetReadAheadSize(const std::string & entry_block_device,off64_t size_kb)4525 void SnapshotManager::SetReadAheadSize(const std::string& entry_block_device, off64_t size_kb) {
4526 std::string block_device;
4527 if (!Realpath(entry_block_device, &block_device)) {
4528 PLOG(ERROR) << "Failed to realpath " << entry_block_device;
4529 return;
4530 }
4531
4532 static constexpr std::string_view kDevBlockPrefix("/dev/block/");
4533 if (!android::base::StartsWith(block_device, kDevBlockPrefix)) {
4534 LOG(ERROR) << block_device << " is not a block device";
4535 return;
4536 }
4537
4538 std::string block_name = block_device.substr(kDevBlockPrefix.length());
4539 std::string sys_partition =
4540 android::base::StringPrintf("/sys/class/block/%s/partition", block_name.c_str());
4541 struct stat info;
4542 if (lstat(sys_partition.c_str(), &info) == 0) {
4543 block_name += "/..";
4544 }
4545 std::string sys_ra = android::base::StringPrintf("/sys/class/block/%s/queue/read_ahead_kb",
4546 block_name.c_str());
4547 std::string size = std::to_string(size_kb);
4548 android::base::WriteStringToFile(size, sys_ra.c_str());
4549 }
4550
4551 } // namespace snapshot
4552 } // namespace android
4553