1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23 
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 
28 #include <android-base/file.h>
29 #include <android-base/logging.h>
30 #include <android-base/parseint.h>
31 #include <android-base/properties.h>
32 #include <android-base/stringprintf.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44 
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "utility.h"
51 
52 namespace android {
53 namespace snapshot {
54 
55 using aidl::android::hardware::boot::MergeStatus;
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using chromeos_update_engine::DeltaArchiveManifest;
76 using chromeos_update_engine::Extent;
77 using chromeos_update_engine::FileDescriptor;
78 using chromeos_update_engine::PartitionUpdate;
79 template <typename T>
80 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
81 using std::chrono::duration_cast;
82 using namespace std::chrono_literals;
83 using namespace std::string_literals;
84 using android::base::Realpath;
85 using android::base::StringPrintf;
86 
87 static constexpr char kBootSnapshotsWithoutSlotSwitch[] =
88         "/metadata/ota/snapshot-boot-without-slot-switch";
89 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
90 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
91 static constexpr auto kUpdateStateCheckInterval = 2s;
92 /*
93  * The readahead size is set to 32kb so that
94  * there is no significant memory pressure (/proc/pressure/memory) during boot.
95  * After OTA, during boot, partitions are scanned before marking slot as successful.
96  * This scan will trigger readahead both on source and COW block device thereby
97  * leading to Inactive(file) pages to be very high.
98  *
99  * A lower value may help reduce memory pressure further, however, that will
100  * increase the boot time. Thus, for device which don't care about OTA boot
101  * time, they could use O_DIRECT functionality wherein the I/O to the source
102  * block device will be O_DIRECT.
103  */
104 static constexpr auto kReadAheadSizeKb = 32;
105 
106 // Note: IImageManager is an incomplete type in the header, so the default
107 // destructor doesn't work.
~SnapshotManager()108 SnapshotManager::~SnapshotManager() {}
109 
New(IDeviceInfo * info)110 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
111     if (!info) {
112         info = new DeviceInfo();
113     }
114 
115     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
116 }
117 
NewForFirstStageMount(IDeviceInfo * info)118 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
119     if (!info) {
120         DeviceInfo* impl = new DeviceInfo();
121         impl->set_first_stage_init(true);
122         info = impl;
123     }
124     auto sm = New(info);
125 
126     // The first-stage version of snapuserd is explicitly started by init. Do
127     // not attempt to using it during tests (which run in normal AOSP).
128     if (!sm->device()->IsTestDevice()) {
129         sm->use_first_stage_snapuserd_ = true;
130     }
131     return sm;
132 }
133 
SnapshotManager(IDeviceInfo * device)134 SnapshotManager::SnapshotManager(IDeviceInfo* device)
135     : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {}
136 
GetCowName(const std::string & snapshot_name)137 static std::string GetCowName(const std::string& snapshot_name) {
138     return snapshot_name + "-cow";
139 }
140 
GetSnapshotDriver(LockedFile * lock)141 SnapshotManager::SnapshotDriver SnapshotManager::GetSnapshotDriver(LockedFile* lock) {
142     if (UpdateUsesUserSnapshots(lock)) {
143         return SnapshotManager::SnapshotDriver::DM_USER;
144     } else {
145         return SnapshotManager::SnapshotDriver::DM_SNAPSHOT;
146     }
147 }
148 
GetDmUserCowName(const std::string & snapshot_name,SnapshotManager::SnapshotDriver driver)149 static std::string GetDmUserCowName(const std::string& snapshot_name,
150                                     SnapshotManager::SnapshotDriver driver) {
151     // dm-user block device will act as a snapshot device. We identify it with
152     // the same partition name so that when partitions can be mounted off
153     // dm-user.
154 
155     switch (driver) {
156         case SnapshotManager::SnapshotDriver::DM_USER: {
157             return snapshot_name;
158         }
159 
160         case SnapshotManager::SnapshotDriver::DM_SNAPSHOT: {
161             return snapshot_name + "-user-cow";
162         }
163 
164         default: {
165             LOG(ERROR) << "Invalid snapshot driver";
166             return "";
167         }
168     }
169 }
170 
GetCowImageDeviceName(const std::string & snapshot_name)171 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
172     return snapshot_name + "-cow-img";
173 }
174 
GetBaseDeviceName(const std::string & partition_name)175 static std::string GetBaseDeviceName(const std::string& partition_name) {
176     return partition_name + "-base";
177 }
178 
GetSourceDeviceName(const std::string & partition_name)179 static std::string GetSourceDeviceName(const std::string& partition_name) {
180     return partition_name + "-src";
181 }
182 
BeginUpdate()183 bool SnapshotManager::BeginUpdate() {
184     bool needs_merge = false;
185     if (!TryCancelUpdate(&needs_merge)) {
186         return false;
187     }
188     if (needs_merge) {
189         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
190         auto state = ProcessUpdateState();
191         LOG(INFO) << "Merged with state = " << state;
192     }
193 
194     auto file = LockExclusive();
195     if (!file) return false;
196 
197     // Purge the ImageManager just in case there is a corrupt lp_metadata file
198     // lying around. (NB: no need to return false on an error, we can let the
199     // update try to progress.)
200     if (EnsureImageManager()) {
201         images_->RemoveAllImages();
202     }
203 
204     // Clear any cached metadata (this allows re-using one manager across tests).
205     old_partition_metadata_ = nullptr;
206 
207     auto state = ReadUpdateState(file.get());
208     if (state != UpdateState::None) {
209         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
210         return false;
211     }
212     return WriteUpdateState(file.get(), UpdateState::Initiated);
213 }
214 
CancelUpdate()215 bool SnapshotManager::CancelUpdate() {
216     bool needs_merge = false;
217     if (!TryCancelUpdate(&needs_merge)) {
218         return false;
219     }
220     if (needs_merge) {
221         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
222     }
223     return !needs_merge;
224 }
225 
TryCancelUpdate(bool * needs_merge)226 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
227     *needs_merge = false;
228 
229     auto file = LockExclusive();
230     if (!file) return false;
231 
232     if (IsSnapshotWithoutSlotSwitch()) {
233         LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
234                       "current slot.";
235         return false;
236     }
237 
238     UpdateState state = ReadUpdateState(file.get());
239     if (state == UpdateState::None) {
240         RemoveInvalidSnapshots(file.get());
241         return true;
242     }
243 
244     if (state == UpdateState::Initiated) {
245         LOG(INFO) << "Update has been initiated, now canceling";
246         return RemoveAllUpdateState(file.get());
247     }
248 
249     if (state == UpdateState::Unverified) {
250         // We completed an update, but it can still be canceled if we haven't booted into it.
251         auto slot = GetCurrentSlot();
252         if (slot != Slot::Target) {
253             LOG(INFO) << "Canceling previously completed updates (if any)";
254             return RemoveAllUpdateState(file.get());
255         }
256     }
257     *needs_merge = true;
258     return true;
259 }
260 
ReadUpdateSourceSlotSuffix()261 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
262     auto boot_file = GetSnapshotBootIndicatorPath();
263     std::string contents;
264     if (!android::base::ReadFileToString(boot_file, &contents)) {
265         return {};
266     }
267     return contents;
268 }
269 
GetCurrentSlot()270 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
271     auto contents = ReadUpdateSourceSlotSuffix();
272     if (contents.empty()) {
273         return Slot::Unknown;
274     }
275     if (device_->GetSlotSuffix() == contents) {
276         return Slot::Source;
277     }
278     return Slot::Target;
279 }
280 
GetSnapshotSlotSuffix()281 std::string SnapshotManager::GetSnapshotSlotSuffix() {
282     switch (GetCurrentSlot()) {
283         case Slot::Target:
284             return device_->GetSlotSuffix();
285         default:
286             return device_->GetOtherSlotSuffix();
287     }
288 }
289 
RemoveFileIfExists(const std::string & path)290 static bool RemoveFileIfExists(const std::string& path) {
291     std::string message;
292     if (!android::base::RemoveFileIfExists(path, &message)) {
293         LOG(ERROR) << "Remove failed: " << path << ": " << message;
294         return false;
295     }
296     return true;
297 }
298 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)299 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
300     if (prolog && !prolog()) {
301         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
302         return false;
303     }
304 
305     LOG(INFO) << "Removing all update state.";
306 
307     if (!RemoveAllSnapshots(lock)) {
308         LOG(ERROR) << "Could not remove all snapshots";
309         return false;
310     }
311 
312     // It's okay if these fail:
313     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
314     // reading the indicator file, so it's not a problem if it still exists
315     // after the update completes.
316     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
317     // matches the incoming update.
318     std::vector<std::string> files = {
319             GetSnapshotBootIndicatorPath(),          GetRollbackIndicatorPath(),
320             GetForwardMergeIndicatorPath(),          GetOldPartitionMetadataPath(),
321             GetBootSnapshotsWithoutSlotSwitchPath(),
322     };
323     for (const auto& file : files) {
324         RemoveFileIfExists(file);
325     }
326 
327     // If this fails, we'll keep trying to remove the update state (as the
328     // device reboots or starts a new update) until it finally succeeds.
329     return WriteUpdateState(lock, UpdateState::None);
330 }
331 
FinishedSnapshotWrites(bool wipe)332 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
333     auto lock = LockExclusive();
334     if (!lock) return false;
335 
336     auto update_state = ReadUpdateState(lock.get());
337     if (update_state == UpdateState::Unverified) {
338         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
339         return true;
340     }
341 
342     if (update_state != UpdateState::Initiated) {
343         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
344         return false;
345     }
346 
347     if (!EnsureNoOverflowSnapshot(lock.get())) {
348         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
349         return false;
350     }
351 
352     if (!UpdateForwardMergeIndicator(wipe)) {
353         return false;
354     }
355 
356     // This file is written on boot to detect whether a rollback occurred. It
357     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
358     // snapshots too early.
359     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
360         return false;
361     }
362 
363     // This file acts as both a quick indicator for init (it can use access(2)
364     // to decide how to do first-stage mounts), and it stores the old slot, so
365     // we can tell whether or not we performed a rollback.
366     auto contents = device_->GetSlotSuffix();
367     auto boot_file = GetSnapshotBootIndicatorPath();
368     if (!WriteStringToFileAtomic(contents, boot_file)) {
369         PLOG(ERROR) << "write failed: " << boot_file;
370         return false;
371     }
372     return WriteUpdateState(lock.get(), UpdateState::Unverified);
373 }
374 
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)375 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
376                                      SnapshotStatus* status) {
377     CHECK(lock);
378     CHECK(lock->lock_mode() == LOCK_EX);
379     CHECK(status);
380 
381     if (status->name().empty()) {
382         LOG(ERROR) << "SnapshotStatus has no name.";
383         return false;
384     }
385     // Check these sizes. Like liblp, we guarantee the partition size is
386     // respected, which means it has to be sector-aligned. (This guarantee is
387     // useful for locating avb footers correctly). The COW file size, however,
388     // can be arbitrarily larger than specified, so we can safely round it up.
389     if (status->device_size() % kSectorSize != 0) {
390         LOG(ERROR) << "Snapshot " << status->name()
391                    << " device size is not a multiple of the sector size: "
392                    << status->device_size();
393         return false;
394     }
395     if (status->snapshot_size() % kSectorSize != 0) {
396         LOG(ERROR) << "Snapshot " << status->name()
397                    << " snapshot size is not a multiple of the sector size: "
398                    << status->snapshot_size();
399         return false;
400     }
401     if (status->cow_partition_size() % kSectorSize != 0) {
402         LOG(ERROR) << "Snapshot " << status->name()
403                    << " cow partition size is not a multiple of the sector size: "
404                    << status->cow_partition_size();
405         return false;
406     }
407     if (status->cow_file_size() % kSectorSize != 0) {
408         LOG(ERROR) << "Snapshot " << status->name()
409                    << " cow file size is not a multiple of the sector size: "
410                    << status->cow_file_size();
411         return false;
412     }
413 
414     status->set_state(SnapshotState::CREATED);
415     status->set_sectors_allocated(0);
416     status->set_metadata_sectors(0);
417     status->set_using_snapuserd(cow_creator->using_snapuserd);
418     status->set_compression_algorithm(cow_creator->compression_algorithm);
419     status->set_compression_factor(cow_creator->compression_factor);
420     status->set_read_ahead_size(cow_creator->read_ahead_size);
421     if (cow_creator->enable_threading) {
422         status->set_enable_threading(cow_creator->enable_threading);
423     }
424     if (cow_creator->batched_writes) {
425         status->set_batched_writes(cow_creator->batched_writes);
426     }
427 
428     if (!WriteSnapshotStatus(lock, *status)) {
429         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
430         return false;
431     }
432     return true;
433 }
434 
CreateCowImage(LockedFile * lock,const std::string & name)435 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
436     CHECK(lock);
437     CHECK(lock->lock_mode() == LOCK_EX);
438     if (!EnsureImageManager()) return Return::Error();
439 
440     SnapshotStatus status;
441     if (!ReadSnapshotStatus(lock, name, &status)) {
442         return Return::Error();
443     }
444 
445     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
446     if (status.cow_file_size() % kSectorSize != 0) {
447         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
448                    << status.cow_file_size();
449         return Return::Error();
450     }
451 
452     std::string cow_image_name = GetCowImageDeviceName(name);
453     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
454     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
455 }
456 
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::string & base_path_merge,const std::chrono::milliseconds & timeout_ms,std::string * path)457 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
458                                    const std::string& cow_file, const std::string& base_device,
459                                    const std::string& base_path_merge,
460                                    const std::chrono::milliseconds& timeout_ms, std::string* path) {
461     CHECK(lock);
462 
463     if (UpdateUsesUserSnapshots(lock)) {
464         SnapshotStatus status;
465         if (!ReadSnapshotStatus(lock, name, &status)) {
466             LOG(ERROR) << "MapDmUserCow: ReadSnapshotStatus failed...";
467             return false;
468         }
469 
470         if (status.state() == SnapshotState::NONE ||
471             status.state() == SnapshotState::MERGE_COMPLETED) {
472             LOG(ERROR) << "Should not create a snapshot device for " << name
473                        << " after merging has completed.";
474             return false;
475         }
476 
477         SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
478         if (update_status.state() == UpdateState::MergeCompleted ||
479             update_status.state() == UpdateState::MergeNeedsReboot) {
480             LOG(ERROR) << "Should not create a snapshot device for " << name
481                        << " after global merging has completed.";
482             return false;
483         }
484     }
485 
486     // Use an extra decoration for first-stage init, so we can transition
487     // to a new table entry in second-stage.
488     std::string misc_name = name;
489     if (use_first_stage_snapuserd_) {
490         misc_name += "-init";
491     }
492 
493     if (!EnsureSnapuserdConnected()) {
494         return false;
495     }
496 
497     uint64_t base_sectors = 0;
498     if (!UpdateUsesUserSnapshots(lock)) {
499         base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
500         if (base_sectors == 0) {
501             LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
502             return false;
503         }
504     } else if (IsSnapshotWithoutSlotSwitch()) {
505         // When snapshots are on current slot, we determine the size
506         // of block device based on the number of COW operations. We cannot
507         // use base device as it will be from older image.
508         unique_fd fd(open(cow_file.c_str(), O_RDONLY | O_CLOEXEC));
509         if (fd < 0) {
510             PLOG(ERROR) << "Failed to open " << cow_file;
511             return false;
512         }
513 
514         CowReader reader;
515         if (!reader.Parse(std::move(fd))) {
516             LOG(ERROR) << "Failed to parse cow " << cow_file;
517             return false;
518         }
519 
520         uint64_t dev_sz = 0;
521         const auto& header = reader.GetHeader();
522         if (header.prefix.major_version == 2) {
523             const size_t num_ops = reader.get_num_total_data_ops();
524             dev_sz = (num_ops * header.block_size);
525         } else {
526             // create_snapshot will skip in-place copy ops. Hence, fetch this
527             // information directly from v3 header.
528             const auto& v3_header = reader.header_v3();
529             dev_sz = v3_header.op_count_max * v3_header.block_size;
530         }
531 
532         base_sectors = dev_sz >> 9;
533     } else {
534         // For userspace snapshots, the size of the base device is taken as the
535         // size of the dm-user block device. Since there is no pseudo mapping
536         // created in the daemon, we no longer need to rely on the daemon for
537         // sizing the dm-user block device.
538         unique_fd fd(TEMP_FAILURE_RETRY(open(base_path_merge.c_str(), O_RDONLY | O_CLOEXEC)));
539         if (fd < 0) {
540             LOG(ERROR) << "Cannot open block device: " << base_path_merge;
541             return false;
542         }
543 
544         uint64_t dev_sz = get_block_device_size(fd.get());
545         if (!dev_sz) {
546             LOG(ERROR) << "Failed to find block device size: " << base_path_merge;
547             return false;
548         }
549 
550         base_sectors = dev_sz >> 9;
551     }
552 
553     DmTable table;
554     table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
555     if (!dm_.CreateDevice(name, table, path, timeout_ms)) {
556         LOG(ERROR) << " dm-user: CreateDevice failed... ";
557         return false;
558     }
559     if (!WaitForDevice(*path, timeout_ms)) {
560         LOG(ERROR) << " dm-user: timeout: Failed to create block device for: " << name;
561         return false;
562     }
563 
564     auto control_device = "/dev/dm-user/" + misc_name;
565     if (!WaitForDevice(control_device, timeout_ms)) {
566         return false;
567     }
568 
569     if (UpdateUsesUserSnapshots(lock)) {
570         // Now that the dm-user device is created, initialize the daemon and
571         // spin up the worker threads.
572         if (!snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device, base_path_merge)) {
573             LOG(ERROR) << "InitDmUserCow failed";
574             return false;
575         }
576     }
577 
578     return snapuserd_client_->AttachDmUser(misc_name);
579 }
580 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)581 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
582                                   const std::string& base_device, const std::string& cow_device,
583                                   const std::chrono::milliseconds& timeout_ms,
584                                   std::string* dev_path) {
585     CHECK(lock);
586 
587     SnapshotStatus status;
588     if (!ReadSnapshotStatus(lock, name, &status)) {
589         return false;
590     }
591     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
592         LOG(ERROR) << "Should not create a snapshot device for " << name
593                    << " after merging has completed.";
594         return false;
595     }
596 
597     // Validate the block device size, as well as the requested snapshot size.
598     // Note that during first-stage init, we don't have the device paths.
599     if (android::base::StartsWith(base_device, "/")) {
600         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
601         if (fd < 0) {
602             PLOG(ERROR) << "open failed: " << base_device;
603             return false;
604         }
605         auto dev_size = get_block_device_size(fd);
606         if (!dev_size) {
607             PLOG(ERROR) << "Could not determine block device size: " << base_device;
608             return false;
609         }
610         if (status.device_size() != dev_size) {
611             LOG(ERROR) << "Block device size for " << base_device << " does not match"
612                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
613             return false;
614         }
615     }
616     if (status.device_size() % kSectorSize != 0) {
617         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
618         return false;
619     }
620     if (status.snapshot_size() % kSectorSize != 0 ||
621         status.snapshot_size() > status.device_size()) {
622         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
623         return false;
624     }
625     if (status.device_size() != status.snapshot_size()) {
626         LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
627                    << status.device_size() << ", snapshot size = " << status.snapshot_size();
628         return false;
629     }
630 
631     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
632 
633     // Note that merging is a global state. We do track whether individual devices
634     // have completed merging, but the start of the merge process is considered
635     // atomic.
636     SnapshotStorageMode mode;
637     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
638     switch (update_status.state()) {
639         case UpdateState::MergeCompleted:
640         case UpdateState::MergeNeedsReboot:
641             LOG(ERROR) << "Should not create a snapshot device for " << name
642                        << " after global merging has completed.";
643             return false;
644         case UpdateState::Merging:
645         case UpdateState::MergeFailed:
646             // Note: MergeFailed indicates that a merge is in progress, but
647             // is possibly stalled. We still have to honor the merge.
648             if (DecideMergePhase(status) == update_status.merge_phase()) {
649                 mode = SnapshotStorageMode::Merge;
650             } else {
651                 mode = SnapshotStorageMode::Persistent;
652             }
653             break;
654         default:
655             mode = SnapshotStorageMode::Persistent;
656             break;
657     }
658 
659     if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
660         LOG(ERROR) << "Snapshot: " << name
661                    << " has snapshot status Merging but mode set to Persistent."
662                    << " Changing mode to Snapshot-Merge.";
663         mode = SnapshotStorageMode::Merge;
664     }
665 
666     DmTable table;
667     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
668                                     kSnapshotChunkSize);
669     if (!dm_.CreateDevice(name, table, dev_path, timeout_ms)) {
670         LOG(ERROR) << "Could not create snapshot device: " << name;
671         return false;
672     }
673     return true;
674 }
675 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)676 std::optional<std::string> SnapshotManager::MapCowImage(
677         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
678     if (!EnsureImageManager()) return std::nullopt;
679     auto cow_image_name = GetCowImageDeviceName(name);
680 
681     bool ok;
682     std::string cow_dev;
683     if (device_->IsRecovery() || device_->IsFirstStageInit()) {
684         const auto& opener = device_->GetPartitionOpener();
685         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
686     } else {
687         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
688     }
689 
690     if (ok) {
691         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
692         return cow_dev;
693     }
694     LOG(ERROR) << "Could not map image device: " << cow_image_name;
695     return std::nullopt;
696 }
697 
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)698 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
699                                       const std::chrono::milliseconds& timeout_ms,
700                                       std::string* path) {
701     CHECK(lock);
702 
703     auto metadata = ReadOldPartitionMetadata(lock);
704     if (!metadata) {
705         LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
706         return false;
707     }
708 
709     auto old_name = GetOtherPartitionName(name);
710     auto slot_suffix = device_->GetSlotSuffix();
711     auto slot = SlotNumberForSlotSuffix(slot_suffix);
712 
713     CreateLogicalPartitionParams params = {
714             .block_device = device_->GetSuperDevice(slot),
715             .metadata = metadata,
716             .partition_name = old_name,
717             .timeout_ms = timeout_ms,
718             .device_name = GetSourceDeviceName(name),
719             .partition_opener = &device_->GetPartitionOpener(),
720     };
721     if (!CreateLogicalPartition(std::move(params), path)) {
722         LOG(ERROR) << "Could not create source device for snapshot " << name;
723         return false;
724     }
725     return true;
726 }
727 
UnmapSnapshot(LockedFile * lock,const std::string & name)728 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
729     CHECK(lock);
730 
731     if (UpdateUsesUserSnapshots(lock)) {
732         if (!UnmapUserspaceSnapshotDevice(lock, name)) {
733             return false;
734         }
735     } else {
736         if (!DeleteDeviceIfExists(name)) {
737             LOG(ERROR) << "Could not delete snapshot device: " << name;
738             return false;
739         }
740     }
741     return true;
742 }
743 
UnmapCowImage(const std::string & name)744 bool SnapshotManager::UnmapCowImage(const std::string& name) {
745     if (!EnsureImageManager()) return false;
746     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
747 }
748 
DeleteSnapshot(LockedFile * lock,const std::string & name)749 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
750     CHECK(lock);
751     CHECK(lock->lock_mode() == LOCK_EX);
752     if (!EnsureImageManager()) return false;
753 
754     if (!UnmapCowDevices(lock, name)) {
755         return false;
756     }
757 
758     // We can't delete snapshots in recovery. The only way we'd try is it we're
759     // completing or canceling a merge in preparation for a data wipe, in which
760     // case, we don't care if the file sticks around.
761     if (device_->IsRecovery()) {
762         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
763         return true;
764     }
765 
766     auto cow_image_name = GetCowImageDeviceName(name);
767     if (images_->BackingImageExists(cow_image_name)) {
768         if (!images_->DeleteBackingImage(cow_image_name)) {
769             return false;
770         }
771     }
772 
773     std::string error;
774     auto file_path = GetSnapshotStatusFilePath(name);
775     if (!android::base::RemoveFileIfExists(file_path, &error)) {
776         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
777         return false;
778     }
779 
780     // This path may never exist. If it is present, then it's a stale
781     // snapshot status file. Just remove the file and log the message.
782     const std::string tmp_path = file_path + ".tmp";
783     if (!android::base::RemoveFileIfExists(tmp_path, &error)) {
784         LOG(ERROR) << "Failed to remove stale snapshot file " << tmp_path;
785     }
786 
787     return true;
788 }
789 
InitiateMerge()790 bool SnapshotManager::InitiateMerge() {
791     auto lock = LockExclusive();
792     if (!lock) return false;
793 
794     UpdateState state = ReadUpdateState(lock.get());
795     if (state != UpdateState::Unverified) {
796         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
797         return false;
798     }
799 
800     auto slot = GetCurrentSlot();
801     if (slot != Slot::Target) {
802         LOG(ERROR) << "Device cannot merge while not booting from new slot";
803         return false;
804     }
805 
806     std::vector<std::string> snapshots;
807     if (!ListSnapshots(lock.get(), &snapshots)) {
808         LOG(ERROR) << "Could not list snapshots";
809         return false;
810     }
811 
812     auto current_slot_suffix = device_->GetSlotSuffix();
813 
814     for (const auto& snapshot : snapshots) {
815         if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
816             // Allow the merge to continue, but log this unexpected case.
817             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
818             continue;
819         }
820 
821         // The device has to be mapped, since everything should be merged at
822         // the same time. This is a fairly serious error. We could forcefully
823         // map everything here, but it should have been mapped during first-
824         // stage init.
825         if (dm_.GetState(snapshot) == DmDeviceState::INVALID) {
826             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
827             return false;
828         }
829     }
830 
831     auto metadata = ReadCurrentMetadata();
832     for (auto it = snapshots.begin(); it != snapshots.end();) {
833         switch (GetMetadataPartitionState(*metadata, *it)) {
834             case MetadataPartitionState::Flashed:
835                 LOG(WARNING) << "Detected re-flashing for partition " << *it
836                              << ". Skip merging it.";
837                 [[fallthrough]];
838             case MetadataPartitionState::None: {
839                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
840                 if (!DeleteSnapshot(lock.get(), *it)) {
841                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
842                                  << ". Skip merging it anyways.";
843                 }
844                 it = snapshots.erase(it);
845             } break;
846             case MetadataPartitionState::Updated: {
847                 ++it;
848             } break;
849         }
850     }
851 
852     bool using_snapuserd = false;
853 
854     std::vector<std::string> first_merge_group;
855 
856     DmTargetSnapshot::Status initial_target_values = {};
857     for (const auto& snapshot : snapshots) {
858         if (!UpdateUsesUserSnapshots(lock.get())) {
859             DmTargetSnapshot::Status current_status;
860             if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
861                 return false;
862             }
863             initial_target_values.sectors_allocated += current_status.sectors_allocated;
864             initial_target_values.total_sectors += current_status.total_sectors;
865             initial_target_values.metadata_sectors += current_status.metadata_sectors;
866         }
867 
868         SnapshotStatus snapshot_status;
869         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
870             return false;
871         }
872 
873         using_snapuserd |= snapshot_status.using_snapuserd();
874         if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
875             first_merge_group.emplace_back(snapshot);
876         }
877     }
878 
879     SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
880     initial_status.set_state(UpdateState::Merging);
881     initial_status.set_using_snapuserd(using_snapuserd);
882 
883     if (!UpdateUsesUserSnapshots(lock.get())) {
884         initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
885         initial_status.set_total_sectors(initial_target_values.total_sectors);
886         initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
887     }
888 
889     // If any partitions shrunk, we need to merge them before we merge any other
890     // partitions (see b/177935716). Otherwise, a merge from another partition
891     // may overwrite the source block of a copy operation.
892     const std::vector<std::string>* merge_group;
893     if (first_merge_group.empty()) {
894         merge_group = &snapshots;
895         initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
896     } else {
897         merge_group = &first_merge_group;
898         initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
899     }
900 
901     // Point of no return - mark that we're starting a merge. From now on every
902     // eligible snapshot must be a merge target.
903     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
904         return false;
905     }
906 
907     auto reported_code = MergeFailureCode::Ok;
908     for (const auto& snapshot : *merge_group) {
909         // If this fails, we have no choice but to continue. Everything must
910         // be merged. This is not an ideal state to be in, but it is safe,
911         // because we the next boot will try again.
912         auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
913         if (code != MergeFailureCode::Ok) {
914             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
915             if (reported_code == MergeFailureCode::Ok) {
916                 reported_code = code;
917             }
918         }
919     }
920 
921     // If we couldn't switch everything to a merge target, pre-emptively mark
922     // this merge as failed. It will get acknowledged when WaitForMerge() is
923     // called.
924     if (reported_code != MergeFailureCode::Ok) {
925         WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
926     }
927 
928     // Return true no matter what, because a merge was initiated.
929     return true;
930 }
931 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)932 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
933     SnapshotStatus status;
934     if (!ReadSnapshotStatus(lock, name, &status)) {
935         return MergeFailureCode::ReadStatus;
936     }
937     if (status.state() != SnapshotState::CREATED) {
938         LOG(WARNING) << "Snapshot " << name
939                      << " has unexpected state: " << SnapshotState_Name(status.state());
940     }
941 
942     if (UpdateUsesUserSnapshots(lock)) {
943         if (EnsureSnapuserdConnected()) {
944             // This is the point where we inform the daemon to initiate/resume
945             // the merge
946             if (!snapuserd_client_->InitiateMerge(name)) {
947                 return MergeFailureCode::UnknownTable;
948             }
949         } else {
950             LOG(ERROR) << "Failed to connect to snapuserd daemon to initiate merge";
951             return MergeFailureCode::UnknownTable;
952         }
953     } else {
954         // After this, we return true because we technically did switch to a merge
955         // target. Everything else we do here is just informational.
956         if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
957             return code;
958         }
959     }
960 
961     status.set_state(SnapshotState::MERGING);
962 
963     if (!UpdateUsesUserSnapshots(lock)) {
964         DmTargetSnapshot::Status dm_status;
965         if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
966             LOG(ERROR) << "Could not query merge status for snapshot: " << name;
967         }
968         status.set_sectors_allocated(dm_status.sectors_allocated);
969         status.set_metadata_sectors(dm_status.metadata_sectors);
970     }
971 
972     if (!WriteSnapshotStatus(lock, status)) {
973         LOG(ERROR) << "Could not update status file for snapshot: " << name;
974     }
975     return MergeFailureCode::Ok;
976 }
977 
RewriteSnapshotDeviceTable(const std::string & name)978 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
979     std::vector<DeviceMapper::TargetInfo> old_targets;
980     if (!dm_.GetTableInfo(name, &old_targets)) {
981         LOG(ERROR) << "Could not read snapshot device table: " << name;
982         return MergeFailureCode::GetTableInfo;
983     }
984     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
985         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
986         return MergeFailureCode::UnknownTable;
987     }
988 
989     std::string base_device, cow_device;
990     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
991         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
992         return MergeFailureCode::GetTableParams;
993     }
994 
995     DmTable table;
996     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
997                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
998     if (!dm_.LoadTableAndActivate(name, table)) {
999         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
1000         return MergeFailureCode::ActivateNewTable;
1001     }
1002     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
1003     return MergeFailureCode::Ok;
1004 }
1005 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)1006 bool SnapshotManager::GetSingleTarget(const std::string& dm_name, TableQuery query,
1007                                       DeviceMapper::TargetInfo* target) {
1008     if (dm_.GetState(dm_name) == DmDeviceState::INVALID) {
1009         return false;
1010     }
1011 
1012     std::vector<DeviceMapper::TargetInfo> targets;
1013     bool result;
1014     if (query == TableQuery::Status) {
1015         result = dm_.GetTableStatus(dm_name, &targets);
1016     } else {
1017         result = dm_.GetTableInfo(dm_name, &targets);
1018     }
1019     if (!result) {
1020         LOG(ERROR) << "Could not query device: " << dm_name;
1021         return false;
1022     }
1023     if (targets.size() != 1) {
1024         return false;
1025     }
1026 
1027     *target = std::move(targets[0]);
1028     return true;
1029 }
1030 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)1031 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
1032     DeviceMapper::TargetInfo snap_target;
1033     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
1034         return false;
1035     }
1036     auto type = DeviceMapper::GetTargetType(snap_target.spec);
1037 
1038     // If this is not a user-snapshot device then it should either
1039     // be a dm-snapshot or dm-snapshot-merge target
1040     if (type != "user") {
1041         if (type != "snapshot" && type != "snapshot-merge") {
1042             return false;
1043         }
1044     }
1045 
1046     if (target) {
1047         *target = std::move(snap_target);
1048     }
1049     return true;
1050 }
1051 
UpdateStateToStr(const enum UpdateState state)1052 auto SnapshotManager::UpdateStateToStr(const enum UpdateState state) {
1053     switch (state) {
1054         case None:
1055             return "None";
1056         case Initiated:
1057             return "Initiated";
1058         case Unverified:
1059             return "Unverified";
1060         case Merging:
1061             return "Merging";
1062         case MergeNeedsReboot:
1063             return "MergeNeedsReboot";
1064         case MergeCompleted:
1065             return "MergeCompleted";
1066         case MergeFailed:
1067             return "MergeFailed";
1068         case Cancelled:
1069             return "Cancelled";
1070         default:
1071             return "Unknown";
1072     }
1073 }
1074 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)1075 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
1076                                           DmTargetSnapshot::Status* status) {
1077     DeviceMapper::TargetInfo target;
1078     if (!IsSnapshotDevice(dm_name, &target)) {
1079         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
1080         return false;
1081     }
1082     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
1083         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
1084         return false;
1085     }
1086     if (target_type) {
1087         *target_type = DeviceMapper::GetTargetType(target.spec);
1088     }
1089     if (!status->error.empty()) {
1090         LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
1091         return false;
1092     }
1093     return true;
1094 }
1095 
1096 // Note that when a merge fails, we will *always* try again to complete the
1097 // merge each time the device boots. There is no harm in doing so, and if
1098 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)1099 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
1100                                                 const std::function<bool()>& before_cancel) {
1101     while (true) {
1102         auto result = CheckMergeState(before_cancel);
1103         LOG(INFO) << "ProcessUpdateState handling state: " << UpdateStateToStr(result.state);
1104 
1105         if (result.state == UpdateState::MergeFailed) {
1106             AcknowledgeMergeFailure(result.failure_code);
1107         }
1108         if (result.state != UpdateState::Merging) {
1109             // Either there is no merge, or the merge was finished, so no need
1110             // to keep waiting.
1111             return result.state;
1112         }
1113 
1114         if (callback && !callback()) {
1115             return result.state;
1116         }
1117 
1118         // This wait is not super time sensitive, so we have a relatively
1119         // low polling frequency.
1120         std::this_thread::sleep_for(kUpdateStateCheckInterval);
1121     }
1122 }
1123 
CheckMergeState(const std::function<bool ()> & before_cancel)1124 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
1125     auto lock = LockExclusive();
1126     if (!lock) {
1127         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
1128     }
1129 
1130     auto result = CheckMergeState(lock.get(), before_cancel);
1131     LOG(INFO) << "CheckMergeState for snapshots returned: " << UpdateStateToStr(result.state);
1132 
1133     if (result.state == UpdateState::MergeCompleted) {
1134         // Do this inside the same lock. Failures get acknowledged without the
1135         // lock, because flock() might have failed.
1136         AcknowledgeMergeSuccess(lock.get());
1137     } else if (result.state == UpdateState::Cancelled) {
1138         if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
1139             LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
1140         }
1141     }
1142     return result;
1143 }
1144 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)1145 auto SnapshotManager::CheckMergeState(LockedFile* lock,
1146                                       const std::function<bool()>& before_cancel) -> MergeResult {
1147     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1148     switch (update_status.state()) {
1149         case UpdateState::None:
1150         case UpdateState::MergeCompleted:
1151             // Harmless races are allowed between two callers of WaitForMerge,
1152             // so in both of these cases we just propagate the state.
1153             return MergeResult(update_status.state());
1154 
1155         case UpdateState::Merging:
1156         case UpdateState::MergeNeedsReboot:
1157         case UpdateState::MergeFailed:
1158             // We'll poll each snapshot below. Note that for the NeedsReboot
1159             // case, we always poll once to give cleanup another opportunity to
1160             // run.
1161             break;
1162 
1163         case UpdateState::Unverified:
1164             // This is an edge case. Normally cancelled updates are detected
1165             // via the merge poll below, but if we never started a merge, we
1166             // need to also check here.
1167             if (HandleCancelledUpdate(lock, before_cancel)) {
1168                 return MergeResult(UpdateState::Cancelled);
1169             }
1170             return MergeResult(update_status.state());
1171 
1172         default:
1173             return MergeResult(update_status.state());
1174     }
1175 
1176     std::vector<std::string> snapshots;
1177     if (!ListSnapshots(lock, &snapshots)) {
1178         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
1179     }
1180 
1181     auto current_slot_suffix = device_->GetSlotSuffix();
1182 
1183     bool cancelled = false;
1184     bool merging = false;
1185     bool needs_reboot = false;
1186     bool wrong_phase = false;
1187     MergeFailureCode failure_code = MergeFailureCode::Ok;
1188     for (const auto& snapshot : snapshots) {
1189         if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
1190             // This will have triggered an error message in InitiateMerge already.
1191             LOG(ERROR) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1192             continue;
1193         }
1194 
1195         auto result = CheckTargetMergeState(lock, snapshot, update_status);
1196         LOG(INFO) << "CheckTargetMergeState for " << snapshot
1197                   << " returned: " << UpdateStateToStr(result.state);
1198 
1199         switch (result.state) {
1200             case UpdateState::MergeFailed:
1201                 // Take the first failure code in case other failures compound.
1202                 if (failure_code == MergeFailureCode::Ok) {
1203                     failure_code = result.failure_code;
1204                 }
1205                 break;
1206             case UpdateState::Merging:
1207                 merging = true;
1208                 break;
1209             case UpdateState::MergeNeedsReboot:
1210                 needs_reboot = true;
1211                 break;
1212             case UpdateState::MergeCompleted:
1213                 break;
1214             case UpdateState::Cancelled:
1215                 cancelled = true;
1216                 break;
1217             case UpdateState::None:
1218                 wrong_phase = true;
1219                 break;
1220             default:
1221                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1222                            << "\"" << result.state << "\"";
1223                 if (failure_code == MergeFailureCode::Ok) {
1224                     failure_code = MergeFailureCode::UnexpectedMergeState;
1225                 }
1226                 break;
1227         }
1228     }
1229 
1230     if (merging) {
1231         // Note that we handle "Merging" before we handle anything else. We
1232         // want to poll until *nothing* is merging if we can, so everything has
1233         // a chance to get marked as completed or failed.
1234         return MergeResult(UpdateState::Merging);
1235     }
1236     if (failure_code != MergeFailureCode::Ok) {
1237         // Note: since there are many drop-out cases for failure, we acknowledge
1238         // it in WaitForMerge rather than here and elsewhere.
1239         return MergeResult(UpdateState::MergeFailed, failure_code);
1240     }
1241     if (wrong_phase) {
1242         // If we got here, no other partitions are being merged, and nothing
1243         // failed to merge. It's safe to move to the next merge phase.
1244         auto code = MergeSecondPhaseSnapshots(lock);
1245         if (code != MergeFailureCode::Ok) {
1246             return MergeResult(UpdateState::MergeFailed, code);
1247         }
1248         return MergeResult(UpdateState::Merging);
1249     }
1250     if (needs_reboot) {
1251         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1252         return MergeResult(UpdateState::MergeNeedsReboot);
1253     }
1254     if (cancelled) {
1255         // This is an edge case, that we handle as correctly as we sensibly can.
1256         // The underlying partition has changed behind update_engine, and we've
1257         // removed the snapshot as a result. The exact state of the update is
1258         // undefined now, but this can only happen on an unlocked device where
1259         // partitions can be flashed without wiping userdata.
1260         return MergeResult(UpdateState::Cancelled);
1261     }
1262     return MergeResult(UpdateState::MergeCompleted);
1263 }
1264 
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1265 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1266                                             const SnapshotUpdateStatus& update_status)
1267         -> MergeResult {
1268     SnapshotStatus snapshot_status;
1269     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1270         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1271     }
1272 
1273     std::unique_ptr<LpMetadata> current_metadata;
1274 
1275     if (!IsSnapshotDevice(name)) {
1276         if (!current_metadata) {
1277             current_metadata = ReadCurrentMetadata();
1278         }
1279 
1280         if (!current_metadata ||
1281             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1282             DeleteSnapshot(lock, name);
1283             return MergeResult(UpdateState::Cancelled);
1284         }
1285 
1286         // During a check, we decided the merge was complete, but we were unable to
1287         // collapse the device-mapper stack and perform COW cleanup. If we haven't
1288         // rebooted after this check, the device will still be a snapshot-merge
1289         // target. If we have rebooted, the device will now be a linear target,
1290         // and we can try cleanup again.
1291         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1292             // NB: It's okay if this fails now, we gave cleanup our best effort.
1293             OnSnapshotMergeComplete(lock, name, snapshot_status);
1294             return MergeResult(UpdateState::MergeCompleted);
1295         }
1296 
1297         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1298         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1299     }
1300 
1301     // This check is expensive so it is only enabled for debugging.
1302     DCHECK((current_metadata = ReadCurrentMetadata()) &&
1303            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1304 
1305     if (UpdateUsesUserSnapshots(lock)) {
1306         if (!EnsureSnapuserdConnected()) {
1307             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1308         }
1309 
1310         // Query the snapshot status from the daemon
1311         const auto merge_status = snapuserd_client_->QuerySnapshotStatus(name);
1312         if (merge_status == "snapshot-merge-failed") {
1313             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1314         }
1315 
1316         // This is the case when device reboots during merge. Once the device boots,
1317         // snapuserd daemon will not resume merge immediately in first stage init.
1318         // This is slightly different as compared to dm-snapshot-merge; In this
1319         // case, metadata file will have "MERGING" state whereas the daemon will be
1320         // waiting to resume the merge. Thus, we resume the merge at this point.
1321         if (merge_status == "snapshot" && snapshot_status.state() == SnapshotState::MERGING) {
1322             if (!snapuserd_client_->InitiateMerge(name)) {
1323                 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1324             }
1325             return MergeResult(UpdateState::Merging);
1326         }
1327 
1328         if (merge_status == "snapshot" &&
1329             DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1330             update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1331             // The snapshot is not being merged because it's in the wrong phase.
1332             return MergeResult(UpdateState::None);
1333         }
1334 
1335         if (merge_status == "snapshot-merge") {
1336             if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1337                 LOG(ERROR) << "Snapshot " << name
1338                            << " is merging after being marked merge-complete.";
1339                 return MergeResult(UpdateState::MergeFailed,
1340                                    MergeFailureCode::UnmergedSectorsAfterCompletion);
1341             }
1342             return MergeResult(UpdateState::Merging);
1343         }
1344 
1345         if (merge_status != "snapshot-merge-complete") {
1346             LOG(ERROR) << "Snapshot " << name << " has incorrect status: " << merge_status;
1347             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1348         }
1349     } else {
1350         // dm-snapshot in the kernel
1351         std::string target_type;
1352         DmTargetSnapshot::Status status;
1353         if (!QuerySnapshotStatus(name, &target_type, &status)) {
1354             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1355         }
1356         if (target_type == "snapshot" &&
1357             DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1358             update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1359             // The snapshot is not being merged because it's in the wrong phase.
1360             return MergeResult(UpdateState::None);
1361         }
1362         if (target_type != "snapshot-merge") {
1363             // We can get here if we failed to rewrite the target type in
1364             // InitiateMerge(). If we failed to create the target in first-stage
1365             // init, boot would not succeed.
1366             LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1367             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1368         }
1369 
1370         // These two values are equal when merging is complete.
1371         if (status.sectors_allocated != status.metadata_sectors) {
1372             if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1373                 LOG(ERROR) << "Snapshot " << name
1374                            << " is merging after being marked merge-complete.";
1375                 return MergeResult(UpdateState::MergeFailed,
1376                                    MergeFailureCode::UnmergedSectorsAfterCompletion);
1377             }
1378             return MergeResult(UpdateState::Merging);
1379         }
1380     }
1381 
1382     // Merging is done. First, update the status file to indicate the merge
1383     // is complete. We do this before calling OnSnapshotMergeComplete, even
1384     // though this means the write is potentially wasted work (since in the
1385     // ideal case we'll immediately delete the file).
1386     //
1387     // This makes it simpler to reason about the next reboot: no matter what
1388     // part of cleanup failed, first-stage init won't try to create another
1389     // snapshot device for this partition.
1390     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1391     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1392         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1393     }
1394     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1395         return MergeResult(UpdateState::MergeNeedsReboot);
1396     }
1397     return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1398 }
1399 
1400 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1401 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1402                                           const SnapshotStatus& status) {
1403     // If no partition was created (the COW exists entirely on /data), the
1404     // device-mapper layering is different than if we had a partition.
1405     if (status.cow_partition_size() == 0) {
1406         return GetCowImageDeviceName(snapshot);
1407     }
1408     return GetCowName(snapshot);
1409 }
1410 
MergeSecondPhaseSnapshots(LockedFile * lock)1411 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1412     std::vector<std::string> snapshots;
1413     if (!ListSnapshots(lock, &snapshots)) {
1414         return MergeFailureCode::ListSnapshots;
1415     }
1416 
1417     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1418     CHECK(update_status.state() == UpdateState::Merging ||
1419           update_status.state() == UpdateState::MergeFailed);
1420     CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1421 
1422     update_status.set_state(UpdateState::Merging);
1423     update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1424     if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1425         return MergeFailureCode::WriteStatus;
1426     }
1427 
1428     MergeFailureCode result = MergeFailureCode::Ok;
1429     for (const auto& snapshot : snapshots) {
1430         SnapshotStatus snapshot_status;
1431         if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1432             return MergeFailureCode::ReadStatus;
1433         }
1434         if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1435             continue;
1436         }
1437         auto code = SwitchSnapshotToMerge(lock, snapshot);
1438         if (code != MergeFailureCode::Ok) {
1439             LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1440             if (result == MergeFailureCode::Ok) {
1441                 result = code;
1442             }
1443         }
1444     }
1445     return result;
1446 }
1447 
GetBootSnapshotsWithoutSlotSwitchPath()1448 std::string SnapshotManager::GetBootSnapshotsWithoutSlotSwitchPath() {
1449     return metadata_dir_ + "/" + android::base::Basename(kBootSnapshotsWithoutSlotSwitch);
1450 }
1451 
GetSnapshotBootIndicatorPath()1452 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1453     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1454 }
1455 
GetRollbackIndicatorPath()1456 std::string SnapshotManager::GetRollbackIndicatorPath() {
1457     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1458 }
1459 
GetForwardMergeIndicatorPath()1460 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1461     return metadata_dir_ + "/allow-forward-merge";
1462 }
1463 
GetOldPartitionMetadataPath()1464 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1465     return metadata_dir_ + "/old-partition-metadata";
1466 }
1467 
AcknowledgeMergeSuccess(LockedFile * lock)1468 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1469     // It's not possible to remove update state in recovery, so write an
1470     // indicator that cleanup is needed on reboot. If a factory data reset
1471     // was requested, it doesn't matter, everything will get wiped anyway.
1472     // To make testing easier we consider a /data wipe as cleaned up.
1473     if (device_->IsRecovery()) {
1474         WriteUpdateState(lock, UpdateState::MergeCompleted);
1475         return;
1476     }
1477 
1478     RemoveAllUpdateState(lock);
1479 
1480     if (UpdateUsesUserSnapshots(lock) && !device()->IsTestDevice()) {
1481         if (snapuserd_client_) {
1482             snapuserd_client_->DetachSnapuserd();
1483             snapuserd_client_->RemoveTransitionedDaemonIndicator();
1484             snapuserd_client_ = nullptr;
1485         }
1486     }
1487 }
1488 
AcknowledgeMergeFailure(MergeFailureCode failure_code)1489 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1490     // Log first, so worst case, we always have a record of why the calls below
1491     // were being made.
1492     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1493 
1494     auto lock = LockExclusive();
1495     if (!lock) return;
1496 
1497     // Since we released the lock in between WaitForMerge and here, it's
1498     // possible (1) the merge successfully completed or (2) was already
1499     // marked as a failure. So make sure to check the state again, and
1500     // only mark as a failure if appropriate.
1501     UpdateState state = ReadUpdateState(lock.get());
1502     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1503         return;
1504     }
1505 
1506     WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1507 }
1508 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1509 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1510                                               const SnapshotStatus& status) {
1511     if (!UpdateUsesUserSnapshots(lock)) {
1512         if (IsSnapshotDevice(name)) {
1513             // We are extra-cautious here, to avoid deleting the wrong table.
1514             std::string target_type;
1515             DmTargetSnapshot::Status dm_status;
1516             if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1517                 return false;
1518             }
1519             if (target_type != "snapshot-merge") {
1520                 LOG(ERROR) << "Unexpected target type " << target_type
1521                            << " for snapshot device: " << name;
1522                 return false;
1523             }
1524             if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1525                 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1526                 return false;
1527             }
1528             if (!CollapseSnapshotDevice(lock, name, status)) {
1529                 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1530                 return false;
1531             }
1532         }
1533     } else {
1534         // Just collapse the device - no need to query again as we just did
1535         // prior to calling this function
1536         if (!CollapseSnapshotDevice(lock, name, status)) {
1537             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1538             return false;
1539         }
1540     }
1541 
1542     // Note that collapsing is implicitly an Unmap, so we don't need to
1543     // unmap the snapshot.
1544 
1545     if (!DeleteSnapshot(lock, name)) {
1546         LOG(ERROR) << "Could not delete snapshot: " << name;
1547         return false;
1548     }
1549     return true;
1550 }
1551 
CollapseSnapshotDevice(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1552 bool SnapshotManager::CollapseSnapshotDevice(LockedFile* lock, const std::string& name,
1553                                              const SnapshotStatus& status) {
1554     if (!UpdateUsesUserSnapshots(lock)) {
1555         // Verify we have a snapshot-merge device.
1556         DeviceMapper::TargetInfo target;
1557         if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1558             return false;
1559         }
1560         if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1561             // This should be impossible, it was checked earlier.
1562             LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1563             return false;
1564         }
1565 
1566         std::string base_device, cow_device;
1567         if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1568             LOG(ERROR) << "Could not parse snapshot device " << name
1569                        << " parameters: " << target.data;
1570             return false;
1571         }
1572     }
1573 
1574     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1575     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1576         LOG(ERROR) << "Snapshot " << name
1577                    << " size is not sector aligned: " << status.snapshot_size();
1578         return false;
1579     }
1580 
1581     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1582     // Create a DmTable that is identical to the base device.
1583     CreateLogicalPartitionParams base_device_params{
1584             .block_device = device_->GetSuperDevice(slot),
1585             .metadata_slot = slot,
1586             .partition_name = name,
1587             .partition_opener = &device_->GetPartitionOpener(),
1588     };
1589     DmTable table;
1590     if (!CreateDmTable(base_device_params, &table)) {
1591         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1592         return false;
1593     }
1594 
1595     if (!dm_.LoadTableAndActivate(name, table)) {
1596         return false;
1597     }
1598 
1599     if (!UpdateUsesUserSnapshots(lock)) {
1600         // Attempt to delete the snapshot device if one still exists. Nothing
1601         // should be depending on the device, and device-mapper should have
1602         // flushed remaining I/O. We could in theory replace with dm-zero (or
1603         // re-use the table above), but for now it's better to know why this
1604         // would fail.
1605         //
1606         // Furthermore, we should not be trying to unmap for userspace snapshot
1607         // as unmap will fail since dm-user itself was a snapshot device prior
1608         // to switching of tables. Unmap will fail as the device will be mounted
1609         // by system partitions
1610         if (status.using_snapuserd()) {
1611             auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
1612             UnmapDmUserDevice(dm_user_name);
1613         }
1614     }
1615 
1616     // We can't delete base device immediately as daemon holds a reference.
1617     // Make sure we wait for all the worker threads to terminate and release
1618     // the reference
1619     if (UpdateUsesUserSnapshots(lock) && EnsureSnapuserdConnected()) {
1620         if (!snapuserd_client_->WaitForDeviceDelete(name)) {
1621             LOG(ERROR) << "Failed to wait for " << name << " control device to delete";
1622         }
1623     }
1624 
1625     auto base_name = GetBaseDeviceName(name);
1626     if (!DeleteDeviceIfExists(base_name)) {
1627         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1628     }
1629 
1630     if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1631         LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1632     }
1633 
1634     return true;
1635 }
1636 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1637 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1638                                             const std::function<bool()>& before_cancel) {
1639     auto slot = GetCurrentSlot();
1640     if (slot == Slot::Unknown) {
1641         return false;
1642     }
1643 
1644     // If all snapshots were reflashed, then cancel the entire update.
1645     if (AreAllSnapshotsCancelled(lock)) {
1646         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1647         return RemoveAllUpdateState(lock, before_cancel);
1648     }
1649 
1650     // If update has been rolled back, then cancel the entire update.
1651     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1652     // when ProcessUpdateState() returns UpdateState::Cancelled.
1653     auto current_slot = GetCurrentSlot();
1654     if (current_slot != Slot::Source) {
1655         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1656                   << " slot, taking no action.";
1657         return false;
1658     }
1659 
1660     // current_slot == Source. Attempt to detect rollbacks.
1661     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1662         // This unverified update is not attempted. Take no action.
1663         PLOG(INFO) << "Rollback indicator not detected. "
1664                    << "Update state is being processed before reboot, taking no action.";
1665         return false;
1666     }
1667 
1668     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1669     return RemoveAllUpdateState(lock, before_cancel);
1670 }
1671 
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1672 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1673                                             std::vector<std::string>* snapuserd_argv) {
1674     LOG(INFO) << "Performing transition for snapuserd.";
1675 
1676     // Don't use EnsureSnapuserdConnected() because this is called from init,
1677     // and attempting to do so will deadlock.
1678     if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1679         snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1680         if (!snapuserd_client_) {
1681             LOG(ERROR) << "Unable to connect to snapuserd";
1682             return false;
1683         }
1684     }
1685 
1686     auto lock = LockExclusive();
1687     if (!lock) return false;
1688 
1689     std::vector<std::string> snapshots;
1690     if (!ListSnapshots(lock.get(), &snapshots)) {
1691         LOG(ERROR) << "Failed to list snapshots.";
1692         return false;
1693     }
1694 
1695     if (UpdateUsesUserSnapshots(lock.get()) && transition == InitTransition::SELINUX_DETACH) {
1696         snapuserd_argv->emplace_back("-user_snapshot");
1697         if (UpdateUsesIouring(lock.get())) {
1698             snapuserd_argv->emplace_back("-io_uring");
1699         }
1700         if (UpdateUsesODirect(lock.get())) {
1701             snapuserd_argv->emplace_back("-o_direct");
1702         }
1703     }
1704 
1705     size_t num_cows = 0;
1706     size_t ok_cows = 0;
1707     for (const auto& snapshot : snapshots) {
1708         std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
1709 
1710         if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
1711             continue;
1712         }
1713 
1714         DeviceMapper::TargetInfo target;
1715         if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1716             continue;
1717         }
1718 
1719         auto target_type = DeviceMapper::GetTargetType(target.spec);
1720         if (target_type != "user") {
1721             LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1722             continue;
1723         }
1724 
1725         num_cows++;
1726 
1727         SnapshotStatus snapshot_status;
1728         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1729             LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1730             continue;
1731         }
1732 
1733         auto misc_name = user_cow_name;
1734 
1735         std::string source_device_name;
1736         if (snapshot_status.old_partition_size() > 0) {
1737             source_device_name = GetSourceDeviceName(snapshot);
1738         } else {
1739             source_device_name = GetBaseDeviceName(snapshot);
1740         }
1741 
1742         std::string source_device;
1743         if (!dm_.GetDmDevicePathByName(source_device_name, &source_device)) {
1744             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1745             continue;
1746         }
1747 
1748         std::string base_path_merge;
1749         if (!dm_.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &base_path_merge)) {
1750             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1751             continue;
1752         }
1753 
1754         std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1755 
1756         std::string cow_image_device;
1757         if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1758             LOG(ERROR) << "Could not get device path for " << cow_image_name;
1759             continue;
1760         }
1761 
1762         if (transition == InitTransition::SELINUX_DETACH) {
1763             if (!UpdateUsesUserSnapshots(lock.get())) {
1764                 auto message = misc_name + "," + cow_image_device + "," + source_device;
1765                 snapuserd_argv->emplace_back(std::move(message));
1766             } else {
1767                 auto message = misc_name + "," + cow_image_device + "," + source_device + "," +
1768                                base_path_merge;
1769                 snapuserd_argv->emplace_back(std::move(message));
1770             }
1771             SetReadAheadSize(cow_image_device, snapshot_status.read_ahead_size());
1772             SetReadAheadSize(source_device, snapshot_status.read_ahead_size());
1773 
1774             // Do not attempt to connect to the new snapuserd yet, it hasn't
1775             // been started. We do however want to wait for the misc device
1776             // to have been created.
1777             ok_cows++;
1778             continue;
1779         }
1780 
1781         DmTable table;
1782         table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1783         if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
1784             LOG(ERROR) << "Unable to swap tables for " << misc_name;
1785             continue;
1786         }
1787 
1788         // Wait for ueventd to acknowledge and create the control device node.
1789         std::string control_device = "/dev/dm-user/" + misc_name;
1790         if (!WaitForDevice(control_device, 10s)) {
1791             LOG(ERROR) << "dm-user control device no found:  " << misc_name;
1792             continue;
1793         }
1794 
1795         uint64_t base_sectors;
1796         if (!UpdateUsesUserSnapshots(lock.get())) {
1797             base_sectors =
1798                     snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1799         } else {
1800             base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_image_device,
1801                                                             source_device, base_path_merge);
1802         }
1803 
1804         if (base_sectors == 0) {
1805             // Unrecoverable as metadata reads from cow device failed
1806             LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1807             return false;
1808         }
1809 
1810         CHECK(base_sectors <= target.spec.length);
1811 
1812         if (!snapuserd_client_->AttachDmUser(misc_name)) {
1813             // This error is unrecoverable. We cannot proceed because reads to
1814             // the underlying device will fail.
1815             LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1816             return false;
1817         }
1818 
1819         ok_cows++;
1820     }
1821 
1822     if (ok_cows != num_cows) {
1823         LOG(ERROR) << "Could not transition all snapuserd consumers.";
1824         return false;
1825     }
1826     return true;
1827 }
1828 
ReadCurrentMetadata()1829 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1830     const auto& opener = device_->GetPartitionOpener();
1831     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1832     auto super_device = device_->GetSuperDevice(slot);
1833     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1834     if (!metadata) {
1835         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1836         return nullptr;
1837     }
1838     return metadata;
1839 }
1840 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1841 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1842         const LpMetadata& metadata, const std::string& name) {
1843     auto partition = android::fs_mgr::FindPartition(metadata, name);
1844     if (!partition) return MetadataPartitionState::None;
1845     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1846         return MetadataPartitionState::Updated;
1847     }
1848     return MetadataPartitionState::Flashed;
1849 }
1850 
AreAllSnapshotsCancelled(LockedFile * lock)1851 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1852     std::vector<std::string> snapshots;
1853     if (!ListSnapshots(lock, &snapshots)) {
1854         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1855                      << "after applying an update. Assuming no snapshots.";
1856         // Let HandleCancelledUpdate resets UpdateState.
1857         return true;
1858     }
1859 
1860     std::map<std::string, bool> flashing_status;
1861 
1862     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1863         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1864                      << "removing update states.";
1865         return false;
1866     }
1867 
1868     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1869                                                [](const auto& pair) { return pair.second; });
1870 
1871     if (all_snapshots_cancelled) {
1872         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1873     }
1874     return all_snapshots_cancelled;
1875 }
1876 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1877 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1878                                                 const std::vector<std::string>& snapshots,
1879                                                 std::map<std::string, bool>* out) {
1880     CHECK(lock);
1881 
1882     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1883     if (source_slot_suffix.empty()) {
1884         return false;
1885     }
1886     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1887     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1888 
1889     // Attempt to detect re-flashing on each partition.
1890     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1891     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1892     //   deleted. Caller is responsible for merging the rest of the snapshots.
1893     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1894     //
1895     // Note that we use target slot metadata, since if an OTA has been applied
1896     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1897     // operation against dynamic partitions ensures that all copies of the
1898     // metadata are in sync, so flashing all partitions on the source slot will
1899     // remove the UPDATED flag on the target slot as well.
1900     const auto& opener = device_->GetPartitionOpener();
1901     auto super_device = device_->GetSuperDevice(target_slot);
1902     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1903     if (!metadata) {
1904         return false;
1905     }
1906 
1907     for (const auto& snapshot_name : snapshots) {
1908         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1909             MetadataPartitionState::Updated) {
1910             out->emplace(snapshot_name, false);
1911         } else {
1912             // Delete snapshots for partitions that are re-flashed after the update.
1913             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1914             out->emplace(snapshot_name, true);
1915         }
1916     }
1917     return true;
1918 }
1919 
RemoveInvalidSnapshots(LockedFile * lock)1920 void SnapshotManager::RemoveInvalidSnapshots(LockedFile* lock) {
1921     std::vector<std::string> snapshots;
1922 
1923     // Remove the stale snapshot metadata
1924     //
1925     // We make sure that all the three cases
1926     // are valid before removing the snapshot metadata:
1927     //
1928     // 1: dm state is active
1929     // 2: Root fs is not mounted off as a snapshot device
1930     // 3: Snapshot slot suffix should match current device slot
1931     if (!ListSnapshots(lock, &snapshots, device_->GetSlotSuffix()) || snapshots.empty()) {
1932         return;
1933     }
1934 
1935     // We indeed have some invalid snapshots
1936     for (const auto& name : snapshots) {
1937         if (dm_.GetState(name) == DmDeviceState::ACTIVE && !IsSnapshotDevice(name)) {
1938             if (!DeleteSnapshot(lock, name)) {
1939                 LOG(ERROR) << "Failed to delete invalid snapshot: " << name;
1940             } else {
1941                 LOG(INFO) << "Invalid snapshot: " << name << " deleted";
1942             }
1943         }
1944     }
1945 }
1946 
RemoveAllSnapshots(LockedFile * lock)1947 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1948     std::vector<std::string> snapshots;
1949     if (!ListSnapshots(lock, &snapshots)) {
1950         LOG(ERROR) << "Could not list snapshots";
1951         return false;
1952     }
1953 
1954     std::map<std::string, bool> flashing_status;
1955     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1956         LOG(WARNING) << "Failed to get flashing status";
1957     }
1958 
1959     auto current_slot = GetCurrentSlot();
1960     bool ok = true;
1961     bool has_mapped_cow_images = false;
1962     for (const auto& name : snapshots) {
1963         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1964         // If boot indicator is missing, update state is None or Initiated, so
1965         //   it is also okay to unmap and delete all the snapshots.
1966         // If booting off target slot,
1967         //  - should not unmap because:
1968         //    - In Android mode, snapshots are not mapped, but
1969         //      filesystems are mounting off dm-linear targets directly.
1970         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1971         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1972         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1973         //    as dm-snapshot (for example, after merge completes).
1974         bool should_unmap = current_slot != Slot::Target;
1975         bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1976         if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1977             // Something very unexpected has happened - we want to unmap this
1978             // snapshot, but it's on the wrong slot. We can't unmap an active
1979             // partition. If this is not really a snapshot, skip the unmap
1980             // step.
1981             if (dm_.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1982                 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1983                            << " for source partition; removing without unmap.";
1984                 should_unmap = false;
1985             }
1986         }
1987 
1988         bool partition_ok = true;
1989         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1990             partition_ok = false;
1991         }
1992         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1993             partition_ok = false;
1994         }
1995 
1996         if (!partition_ok) {
1997             // Remember whether or not we were able to unmap the cow image.
1998             auto cow_image_device = GetCowImageDeviceName(name);
1999             has_mapped_cow_images |=
2000                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
2001 
2002             ok = false;
2003         }
2004     }
2005 
2006     if (ok || !has_mapped_cow_images) {
2007         // Delete any image artifacts as a precaution, in case an update is
2008         // being cancelled due to some corrupted state in an lp_metadata file.
2009         // Note that we do not do this if some cow images are still mapped,
2010         // since we must not remove backing storage if it's in use.
2011         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
2012             LOG(ERROR) << "Could not remove all snapshot artifacts";
2013             return false;
2014         }
2015     }
2016     return ok;
2017 }
2018 
2019 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)2020 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
2021                                            Slot current_slot, const std::string& name) {
2022     if (current_slot != Slot::Target) {
2023         return true;
2024     }
2025     auto it = flashing_status.find(name);
2026     if (it == flashing_status.end()) {
2027         LOG(WARNING) << "Can't determine flashing status for " << name;
2028         return true;
2029     }
2030     if (it->second) {
2031         // partition flashed, okay to delete obsolete snapshots
2032         return true;
2033     }
2034     return !IsSnapshotDevice(name);
2035 }
2036 
GetUpdateState(double * progress)2037 UpdateState SnapshotManager::GetUpdateState(double* progress) {
2038     // If we've never started an update, the state file won't exist.
2039     auto state_file = GetStateFilePath();
2040     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2041         return UpdateState::None;
2042     }
2043 
2044     auto lock = LockShared();
2045     if (!lock) {
2046         return UpdateState::None;
2047     }
2048 
2049     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
2050     auto state = update_status.state();
2051     if (progress == nullptr) {
2052         return state;
2053     }
2054 
2055     if (state == UpdateState::MergeCompleted) {
2056         *progress = 100.0;
2057         return state;
2058     }
2059 
2060     *progress = 0.0;
2061     if (state != UpdateState::Merging) {
2062         return state;
2063     }
2064 
2065     if (!UpdateUsesUserSnapshots(lock.get())) {
2066         // Sum all the snapshot states as if the system consists of a single huge
2067         // snapshots device, then compute the merge completion percentage of that
2068         // device.
2069         std::vector<std::string> snapshots;
2070         if (!ListSnapshots(lock.get(), &snapshots)) {
2071             LOG(ERROR) << "Could not list snapshots";
2072             return state;
2073         }
2074 
2075         DmTargetSnapshot::Status fake_snapshots_status = {};
2076         for (const auto& snapshot : snapshots) {
2077             DmTargetSnapshot::Status current_status;
2078 
2079             if (!IsSnapshotDevice(snapshot)) continue;
2080             if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
2081 
2082             fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
2083             fake_snapshots_status.total_sectors += current_status.total_sectors;
2084             fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
2085         }
2086 
2087         *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
2088                                                    update_status.sectors_allocated());
2089     } else {
2090         if (EnsureSnapuserdConnected()) {
2091             *progress = snapuserd_client_->GetMergePercent();
2092         }
2093     }
2094 
2095     return state;
2096 }
2097 
IsSnapshotWithoutSlotSwitch()2098 bool SnapshotManager::IsSnapshotWithoutSlotSwitch() {
2099     return (access(GetBootSnapshotsWithoutSlotSwitchPath().c_str(), F_OK) == 0);
2100 }
2101 
UpdateUsesCompression()2102 bool SnapshotManager::UpdateUsesCompression() {
2103     auto lock = LockShared();
2104     if (!lock) return false;
2105     return UpdateUsesCompression(lock.get());
2106 }
2107 
UpdateUsesCompression(LockedFile * lock)2108 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
2109     // This returns true even if compression is "none", since update_engine is
2110     // really just trying to see if snapuserd is in use.
2111     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2112     return update_status.using_snapuserd();
2113 }
2114 
UpdateUsesIouring(LockedFile * lock)2115 bool SnapshotManager::UpdateUsesIouring(LockedFile* lock) {
2116     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2117     return update_status.io_uring_enabled();
2118 }
2119 
UpdateUsesODirect(LockedFile * lock)2120 bool SnapshotManager::UpdateUsesODirect(LockedFile* lock) {
2121     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2122     return update_status.o_direct();
2123 }
2124 
2125 /*
2126  * Please see b/304829384 for more details.
2127  *
2128  * In Android S, we use dm-snapshot for mounting snapshots and snapshot-merge
2129  * process. If the vendor partition continues to be on Android S, then
2130  * "snapuserd" binary in first stage ramdisk will be from vendor partition.
2131  * Thus, we need to maintain backward compatibility.
2132  *
2133  * Now, We take a two step approach to maintain the backward compatibility:
2134  *
2135  * 1: During OTA installation, we will continue to use "user-space" snapshots
2136  * for OTA installation as both update-engine and snapuserd binary will be from system partition.
2137  * However, during installation, we mark "legacy_snapuserd" in
2138  * SnapshotUpdateStatus file to mark that this is a path to support backward compatibility.
2139  * Thus, this function will return "false" during OTA installation.
2140  *
2141  * 2: Post OTA reboot, there are two key steps:
2142  *    a: During first stage init, "init" and "snapuserd" could be from vendor
2143  *    partition. This could be from Android S. Thus, the snapshot mount path
2144  *    will be based off dm-snapshot.
2145  *
2146  *    b: Post selinux transition, "init" and "update-engine" will be "system"
2147  *    partition. Now, since the snapshots are mounted off dm-snapshot,
2148  *    update-engine interaction with "snapuserd" should work based off
2149  *    dm-snapshots.
2150  *
2151  *    TL;DR: update-engine will use the "system" snapuserd for installing new
2152  *    updates (this is safe as there is no "vendor" snapuserd running during
2153  *    installation). Post reboot, update-engine will use the legacy path when
2154  *    communicating with "vendor" snapuserd that was started in first-stage
2155  *    init. Hence, this function checks:
2156  *         i: Are we in post OTA reboot
2157  *         ii: Is the Vendor from Android 12
2158  *         iii: If both (i) and (ii) are true, then use the dm-snapshot based
2159  *         approach.
2160  *
2161  */
IsLegacySnapuserdPostReboot()2162 bool SnapshotManager::IsLegacySnapuserdPostReboot() {
2163     if (is_legacy_snapuserd_.has_value() && is_legacy_snapuserd_.value() == true) {
2164         auto slot = GetCurrentSlot();
2165         if (slot == Slot::Target) {
2166             return true;
2167         }
2168     }
2169     return false;
2170 }
2171 
UpdateUsesUserSnapshots()2172 bool SnapshotManager::UpdateUsesUserSnapshots() {
2173     // This and the following function is constantly
2174     // invoked during snapshot merge. We want to avoid
2175     // constantly reading from disk. Hence, store this
2176     // value in memory.
2177     //
2178     // Furthermore, this value in the disk is set
2179     // only when OTA is applied and doesn't change
2180     // during merge phase. Hence, once we know that
2181     // the value is read from disk the very first time,
2182     // it is safe to read successive checks from memory.
2183 
2184     if (is_snapshot_userspace_.has_value()) {
2185         // Check if legacy snapuserd is running post OTA reboot
2186         if (IsLegacySnapuserdPostReboot()) {
2187             return false;
2188         }
2189         return is_snapshot_userspace_.value();
2190     }
2191 
2192     auto lock = LockShared();
2193     if (!lock) return false;
2194 
2195     return UpdateUsesUserSnapshots(lock.get());
2196 }
2197 
UpdateUsesUserSnapshots(LockedFile * lock)2198 bool SnapshotManager::UpdateUsesUserSnapshots(LockedFile* lock) {
2199     if (!is_snapshot_userspace_.has_value()) {
2200         SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2201         is_snapshot_userspace_ = update_status.userspace_snapshots();
2202         is_legacy_snapuserd_ = update_status.legacy_snapuserd();
2203     }
2204 
2205     if (IsLegacySnapuserdPostReboot()) {
2206         return false;
2207     }
2208 
2209     return is_snapshot_userspace_.value();
2210 }
2211 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)2212 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
2213                                     const std::string& suffix) {
2214     CHECK(lock);
2215 
2216     auto dir_path = metadata_dir_ + "/snapshots"s;
2217     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
2218     if (!dir) {
2219         PLOG(ERROR) << "opendir failed: " << dir_path;
2220         return false;
2221     }
2222 
2223     struct dirent* dp;
2224     while ((dp = readdir(dir.get())) != nullptr) {
2225         if (dp->d_type != DT_REG) continue;
2226 
2227         std::string name(dp->d_name);
2228         if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
2229             continue;
2230         }
2231 
2232         // Insert system and product partition at the beginning so that
2233         // during snapshot-merge, these partitions are merged first.
2234         if (name == "system_a" || name == "system_b" || name == "product_a" ||
2235             name == "product_b") {
2236             snapshots->insert(snapshots->begin(), std::move(name));
2237         } else {
2238             snapshots->emplace_back(std::move(name));
2239         }
2240     }
2241 
2242     return true;
2243 }
2244 
IsSnapshotManagerNeeded()2245 bool SnapshotManager::IsSnapshotManagerNeeded() {
2246     return access(kBootIndicatorPath, F_OK) == 0;
2247 }
2248 
GetGlobalRollbackIndicatorPath()2249 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
2250     return kRollbackIndicatorPath;
2251 }
2252 
NeedSnapshotsInFirstStageMount()2253 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
2254     if (IsSnapshotWithoutSlotSwitch()) {
2255         if (GetCurrentSlot() != Slot::Source) {
2256             LOG(ERROR) << "Snapshots marked to boot without slot switch; but slot is wrong";
2257             return false;
2258         }
2259         return true;
2260     }
2261     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
2262     // will create devices that look like the old slot, except with extra
2263     // content at the end of each device. This will confuse dm-verity, and
2264     // ultimately we'll fail to boot. Why not make it a fatal error and have
2265     // the reason be clearer? Because the indicator file still exists, and
2266     // if this was FATAL, reverting to the old slot would be broken.
2267     auto slot = GetCurrentSlot();
2268 
2269     if (slot != Slot::Target) {
2270         if (slot == Slot::Source) {
2271             // Device is rebooting into the original slot, so mark this as a
2272             // rollback.
2273             auto path = GetRollbackIndicatorPath();
2274             if (!android::base::WriteStringToFile("1", path)) {
2275                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
2276             } else {
2277                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
2278             }
2279         }
2280         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
2281         return false;
2282     }
2283 
2284     // If we can't read the update state, it's unlikely anything else will
2285     // succeed, so this is a fatal error. We'll eventually exhaust boot
2286     // attempts and revert to the old slot.
2287     auto lock = LockShared();
2288     if (!lock) {
2289         LOG(FATAL) << "Could not read update state to determine snapshot status";
2290         return false;
2291     }
2292     switch (ReadUpdateState(lock.get())) {
2293         case UpdateState::Unverified:
2294         case UpdateState::Merging:
2295         case UpdateState::MergeFailed:
2296             return true;
2297         default:
2298             return false;
2299     }
2300 }
2301 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)2302 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
2303         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
2304     LOG(INFO) << "Creating logical partitions with snapshots as needed";
2305 
2306     auto lock = LockExclusive();
2307     if (!lock) return false;
2308 
2309     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2310     return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
2311 }
2312 
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)2313 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
2314                                        uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
2315     const auto& opener = device_->GetPartitionOpener();
2316     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2317     if (!metadata) {
2318         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2319         return false;
2320     }
2321 
2322     if (!EnsureImageManager()) {
2323         return false;
2324     }
2325 
2326     for (const auto& partition : metadata->partitions) {
2327         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
2328             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
2329                       << kCowGroupName;
2330             continue;
2331         }
2332 
2333         CreateLogicalPartitionParams params = {
2334                 .block_device = super_device,
2335                 .metadata = metadata.get(),
2336                 .partition = &partition,
2337                 .timeout_ms = timeout_ms,
2338                 .partition_opener = &opener,
2339         };
2340         if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
2341             return false;
2342         }
2343     }
2344 
2345     LOG(INFO) << "Created logical partitions with snapshot.";
2346     return true;
2347 }
2348 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)2349 static std::chrono::milliseconds GetRemainingTime(
2350         const std::chrono::milliseconds& timeout,
2351         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
2352     // If no timeout is specified, execute all commands without specifying any timeout.
2353     if (timeout.count() == 0) return std::chrono::milliseconds(0);
2354     auto passed_time = std::chrono::steady_clock::now() - begin;
2355     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
2356     if (remaining_time.count() <= 0) {
2357         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
2358                    << remaining_time.count() << "ms remaining)";
2359         // Return min() instead of remaining_time here because 0 is treated as a special value for
2360         // no timeout, where the rest of the commands will still be executed.
2361         return std::chrono::milliseconds::min();
2362     }
2363     return remaining_time;
2364 }
2365 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)2366 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
2367                                                CreateLogicalPartitionParams params,
2368                                                SnapshotContext context, SnapshotPaths* paths) {
2369     auto begin = std::chrono::steady_clock::now();
2370 
2371     CHECK(lock);
2372 
2373     if (params.GetPartitionName() != params.GetDeviceName()) {
2374         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
2375                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
2376         return false;
2377     }
2378 
2379     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2380     // reading super partition metadata).
2381     CreateLogicalPartitionParams::OwnedData params_owned_data;
2382     if (!params.InitDefaults(&params_owned_data)) {
2383         return false;
2384     }
2385 
2386     if (!params.partition->num_extents) {
2387         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2388         return true;  // leave path empty to indicate that nothing is mapped.
2389     }
2390 
2391     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2392     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
2393     // completed, live_snapshot_status is set to nullopt.
2394     std::optional<SnapshotStatus> live_snapshot_status;
2395     do {
2396         if (!IsSnapshotWithoutSlotSwitch() &&
2397             !(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2398             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2399                       << params.GetPartitionName();
2400             break;
2401         }
2402         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2403         if (access(file_path.c_str(), F_OK) != 0) {
2404             if (errno != ENOENT) {
2405                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2406                            << ": Can't access " << file_path;
2407                 return false;
2408             }
2409             break;
2410         }
2411         live_snapshot_status = std::make_optional<SnapshotStatus>();
2412         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2413             return false;
2414         }
2415         // No live snapshot if merge is completed.
2416         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2417             live_snapshot_status.reset();
2418         }
2419 
2420         if (live_snapshot_status->state() == SnapshotState::NONE ||
2421             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2422                     0) {
2423             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2424                          << " is invalid, ignoring: state = "
2425                          << SnapshotState_Name(live_snapshot_status->state())
2426                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2427                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2428             live_snapshot_status.reset();
2429         }
2430     } while (0);
2431 
2432     if (live_snapshot_status.has_value()) {
2433         // dm-snapshot requires the base device to be writable.
2434         params.force_writable = true;
2435         // Map the base device with a different name to avoid collision.
2436         params.device_name = GetBaseDeviceName(params.GetPartitionName());
2437     }
2438 
2439     AutoDeviceList created_devices;
2440 
2441     // Create the base device for the snapshot, or if there is no snapshot, the
2442     // device itself. This device consists of the real blocks in the super
2443     // partition that this logical partition occupies.
2444     std::string base_path;
2445     if (!CreateLogicalPartition(params, &base_path)) {
2446         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2447                    << " as device " << params.GetDeviceName();
2448         return false;
2449     }
2450     created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, params.GetDeviceName());
2451 
2452     if (paths) {
2453         paths->target_device = base_path;
2454     }
2455 
2456     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2457     if (remaining_time.count() < 0) {
2458         return false;
2459     }
2460 
2461     // Wait for the base device to appear
2462     if (!WaitForDevice(base_path, remaining_time)) {
2463         return false;
2464     }
2465 
2466     if (!live_snapshot_status.has_value()) {
2467         created_devices.Release();
2468         return true;
2469     }
2470 
2471     // We don't have ueventd in first-stage init, so use device major:minor
2472     // strings instead.
2473     std::string base_device;
2474     if (!dm_.GetDeviceString(params.GetDeviceName(), &base_device)) {
2475         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2476         return false;
2477     }
2478 
2479     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2480     if (remaining_time.count() < 0) return false;
2481 
2482     std::string cow_name;
2483     CreateLogicalPartitionParams cow_params = params;
2484     cow_params.timeout_ms = remaining_time;
2485     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2486         return false;
2487     }
2488     std::string cow_device;
2489     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2490         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2491         return false;
2492     }
2493     if (paths) {
2494         paths->cow_device_name = cow_name;
2495     }
2496 
2497     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2498     if (remaining_time.count() < 0) return false;
2499 
2500     if (context == SnapshotContext::Update && live_snapshot_status->using_snapuserd()) {
2501         // Stop here, we can't run dm-user yet, the COW isn't built.
2502         created_devices.Release();
2503         return true;
2504     }
2505 
2506     if (live_snapshot_status->using_snapuserd()) {
2507         // Get the source device (eg the view of the partition from before it was resized).
2508         std::string source_device_path;
2509         if (live_snapshot_status->old_partition_size() > 0) {
2510             if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2511                                  &source_device_path)) {
2512                 LOG(ERROR) << "Could not map source device for: " << cow_name;
2513                 return false;
2514             }
2515 
2516             auto source_device = GetSourceDeviceName(params.GetPartitionName());
2517             created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, source_device);
2518         } else {
2519             source_device_path = base_path;
2520         }
2521 
2522         if (!WaitForDevice(source_device_path, remaining_time)) {
2523             return false;
2524         }
2525 
2526         std::string cow_path;
2527         if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2528             LOG(ERROR) << "Could not determine path for: " << cow_name;
2529             return false;
2530         }
2531         if (!WaitForDevice(cow_path, remaining_time)) {
2532             return false;
2533         }
2534 
2535         auto name = GetDmUserCowName(params.GetPartitionName(), GetSnapshotDriver(lock));
2536 
2537         std::string new_cow_device;
2538         if (!MapDmUserCow(lock, name, cow_path, source_device_path, base_path, remaining_time,
2539                           &new_cow_device)) {
2540             LOG(ERROR) << "Could not map dm-user device for partition "
2541                        << params.GetPartitionName();
2542             return false;
2543         }
2544         created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, name);
2545 
2546         cow_device = new_cow_device;
2547     }
2548 
2549     // For userspace snapshots, dm-user block device itself will act as a
2550     // snapshot device. There is one subtle difference - MapSnapshot will create
2551     // either snapshot target or snapshot-merge target based on the underlying
2552     // state of the snapshot device. If snapshot-merge target is created, merge
2553     // will immediately start in the kernel.
2554     //
2555     // This is no longer true with respect to userspace snapshots. When dm-user
2556     // block device is created, we just have the snapshots ready but daemon in
2557     // the user-space will not start the merge. We have to explicitly inform the
2558     // daemon to resume the merge. Check ProcessUpdateState() call stack.
2559     if (!UpdateUsesUserSnapshots(lock)) {
2560         remaining_time = GetRemainingTime(params.timeout_ms, begin);
2561         if (remaining_time.count() < 0) return false;
2562 
2563         std::string path;
2564         if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2565                          &path)) {
2566             LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2567             return false;
2568         }
2569         // No need to add params.GetPartitionName() to created_devices since it is immediately
2570         // released.
2571 
2572         if (paths) {
2573             paths->snapshot_device = path;
2574         }
2575         LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2576     } else {
2577         LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at "
2578                   << cow_device;
2579     }
2580 
2581     created_devices.Release();
2582 
2583     return true;
2584 }
2585 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2586 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2587                                                  const std::string& target_partition_name) {
2588     CHECK(lock);
2589 
2590     if (!UnmapSnapshot(lock, target_partition_name)) {
2591         return false;
2592     }
2593 
2594     if (!UnmapCowDevices(lock, target_partition_name)) {
2595         return false;
2596     }
2597 
2598     auto base_name = GetBaseDeviceName(target_partition_name);
2599     if (!DeleteDeviceIfExists(base_name)) {
2600         LOG(ERROR) << "Cannot delete base device: " << base_name;
2601         return false;
2602     }
2603 
2604     auto source_name = GetSourceDeviceName(target_partition_name);
2605     if (!DeleteDeviceIfExists(source_name)) {
2606         LOG(ERROR) << "Cannot delete source device: " << source_name;
2607         return false;
2608     }
2609 
2610     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2611 
2612     return true;
2613 }
2614 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2615 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2616                                     const SnapshotStatus& snapshot_status,
2617                                     AutoDeviceList* created_devices, std::string* cow_name) {
2618     CHECK(lock);
2619     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2620     auto begin = std::chrono::steady_clock::now();
2621 
2622     std::string partition_name = params.GetPartitionName();
2623     std::string cow_image_name = GetCowImageDeviceName(partition_name);
2624     *cow_name = GetCowName(partition_name);
2625 
2626     // Map COW image if necessary.
2627     if (snapshot_status.cow_file_size() > 0) {
2628         if (!EnsureImageManager()) return false;
2629         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2630         if (remaining_time.count() < 0) return false;
2631 
2632         if (!MapCowImage(partition_name, remaining_time).has_value()) {
2633             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2634             return false;
2635         }
2636         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2637 
2638         // If no COW partition exists, just return the image alone.
2639         if (snapshot_status.cow_partition_size() == 0) {
2640             *cow_name = std::move(cow_image_name);
2641             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2642             return true;
2643         }
2644     }
2645 
2646     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2647     if (remaining_time.count() < 0) return false;
2648 
2649     CHECK(snapshot_status.cow_partition_size() > 0);
2650 
2651     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2652     // COW image device as the last extent.
2653     CreateLogicalPartitionParams cow_partition_params = params;
2654     cow_partition_params.partition = nullptr;
2655     cow_partition_params.partition_name = *cow_name;
2656     cow_partition_params.device_name.clear();
2657     DmTable table;
2658     if (!CreateDmTable(cow_partition_params, &table)) {
2659         return false;
2660     }
2661     // If the COW image exists, append it as the last extent.
2662     if (snapshot_status.cow_file_size() > 0) {
2663         std::string cow_image_device;
2664         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2665             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2666             return false;
2667         }
2668         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2669         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2670         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2671                                       0);
2672     }
2673 
2674     // We have created the DmTable now. Map it.
2675     std::string cow_path;
2676     if (!dm_.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2677         LOG(ERROR) << "Could not create COW device: " << *cow_name;
2678         return false;
2679     }
2680     created_devices->EmplaceBack<AutoUnmapDevice>(&dm_, *cow_name);
2681     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2682     return true;
2683 }
2684 
UnmapCowDevices(LockedFile * lock,const std::string & name)2685 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2686     CHECK(lock);
2687     if (!EnsureImageManager()) return false;
2688 
2689     if (UpdateUsesCompression(lock) && !UpdateUsesUserSnapshots(lock)) {
2690         auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
2691         if (!UnmapDmUserDevice(dm_user_name)) {
2692             return false;
2693         }
2694     }
2695 
2696     if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2697         LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2698         return false;
2699     }
2700 
2701     std::string cow_image_name = GetCowImageDeviceName(name);
2702     if (!images_->UnmapImageIfExists(cow_image_name)) {
2703         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2704         return false;
2705     }
2706     return true;
2707 }
2708 
UnmapDmUserDevice(const std::string & dm_user_name)2709 bool SnapshotManager::UnmapDmUserDevice(const std::string& dm_user_name) {
2710     if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2711         return true;
2712     }
2713 
2714     if (!DeleteDeviceIfExists(dm_user_name)) {
2715         LOG(ERROR) << "Cannot unmap " << dm_user_name;
2716         return false;
2717     }
2718 
2719     if (EnsureSnapuserdConnected()) {
2720         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2721             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2722             return false;
2723         }
2724     }
2725 
2726     // Ensure the control device is gone so we don't run into ABA problems.
2727     auto control_device = "/dev/dm-user/" + dm_user_name;
2728     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2729         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2730         return false;
2731     }
2732     return true;
2733 }
2734 
UnmapUserspaceSnapshotDevice(LockedFile * lock,const std::string & snapshot_name)2735 bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
2736                                                    const std::string& snapshot_name) {
2737     auto dm_user_name = GetDmUserCowName(snapshot_name, GetSnapshotDriver(lock));
2738     if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2739         return true;
2740     }
2741 
2742     CHECK(lock);
2743 
2744     SnapshotStatus snapshot_status;
2745 
2746     if (!ReadSnapshotStatus(lock, snapshot_name, &snapshot_status)) {
2747         return false;
2748     }
2749     // If the merge is complete, then we switch dm tables which is equivalent
2750     // to unmap; hence, we can't be deleting the device
2751     // as the table would be mounted off partitions and will fail.
2752     if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
2753         if (!DeleteDeviceIfExists(dm_user_name, 4000ms)) {
2754             LOG(ERROR) << "Cannot unmap " << dm_user_name;
2755             return false;
2756         }
2757     }
2758 
2759     if (EnsureSnapuserdConnected()) {
2760         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2761             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2762             return false;
2763         }
2764     }
2765 
2766     // Ensure the control device is gone so we don't run into ABA problems.
2767     auto control_device = "/dev/dm-user/" + dm_user_name;
2768     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2769         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2770         return false;
2771     }
2772     return true;
2773 }
2774 
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2775 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2776     auto lock = LockExclusive();
2777     if (!lock) return false;
2778 
2779     auto state = ReadUpdateState(lock.get());
2780     if (state == UpdateState::Unverified) {
2781         if (GetCurrentSlot() == Slot::Target) {
2782             LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2783             return false;
2784         }
2785     } else if (state != UpdateState::Initiated) {
2786         LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2787         return false;
2788     }
2789 
2790     std::vector<std::string> snapshots;
2791     if (!ListSnapshots(lock.get(), &snapshots)) {
2792         return false;
2793     }
2794 
2795     const auto& opener = device_->GetPartitionOpener();
2796     auto slot_suffix = device_->GetOtherSlotSuffix();
2797     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2798     auto super_device = device_->GetSuperDevice(slot_number);
2799     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2800     if (!metadata) {
2801         LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2802                    << super_device;
2803         return false;
2804     }
2805 
2806     for (const auto& snapshot : snapshots) {
2807         if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2808             LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2809             return false;
2810         }
2811 
2812         CreateLogicalPartitionParams params = {
2813                 .block_device = super_device,
2814                 .metadata = metadata.get(),
2815                 .partition_name = snapshot,
2816                 .timeout_ms = timeout_ms,
2817                 .partition_opener = &opener,
2818         };
2819         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2820                                       nullptr)) {
2821             LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2822             return false;
2823         }
2824     }
2825 
2826     LOG(INFO) << "MapAllSnapshots succeeded.";
2827     return true;
2828 }
2829 
UnmapAllSnapshots()2830 bool SnapshotManager::UnmapAllSnapshots() {
2831     auto lock = LockExclusive();
2832     if (!lock) return false;
2833 
2834     return UnmapAllSnapshots(lock.get());
2835 }
2836 
UnmapAllSnapshots(LockedFile * lock)2837 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2838     std::vector<std::string> snapshots;
2839     if (!ListSnapshots(lock, &snapshots)) {
2840         return false;
2841     }
2842 
2843     for (const auto& snapshot : snapshots) {
2844         if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2845             LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2846             return false;
2847         }
2848     }
2849 
2850     // Terminate the daemon and release the snapuserd_client_ object.
2851     // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2852     // will re-create the object and establish the socket connection.
2853     if (snapuserd_client_) {
2854         LOG(INFO) << "Shutdown snapuserd daemon";
2855         snapuserd_client_->DetachSnapuserd();
2856         snapuserd_client_ = nullptr;
2857     }
2858 
2859     return true;
2860 }
2861 
OpenFile(const std::string & file,int lock_flags)2862 auto SnapshotManager::OpenFile(const std::string& file,
2863                                int lock_flags) -> std::unique_ptr<LockedFile> {
2864     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2865     if (fd < 0) {
2866         PLOG(ERROR) << "Open failed: " << file;
2867         return nullptr;
2868     }
2869     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2870         PLOG(ERROR) << "Acquire flock failed: " << file;
2871         return nullptr;
2872     }
2873     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2874     // calls, so strip extra flags.
2875     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2876     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2877 }
2878 
~LockedFile()2879 SnapshotManager::LockedFile::~LockedFile() {
2880     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2881         PLOG(ERROR) << "Failed to unlock file: " << path_;
2882     }
2883 }
2884 
GetStateFilePath() const2885 std::string SnapshotManager::GetStateFilePath() const {
2886     return metadata_dir_ + "/state"s;
2887 }
2888 
GetMergeStateFilePath() const2889 std::string SnapshotManager::GetMergeStateFilePath() const {
2890     return metadata_dir_ + "/merge_state"s;
2891 }
2892 
GetLockPath() const2893 std::string SnapshotManager::GetLockPath() const {
2894     return metadata_dir_;
2895 }
2896 
OpenLock(int lock_flags)2897 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2898     auto lock_file = GetLockPath();
2899     return OpenFile(lock_file, lock_flags);
2900 }
2901 
LockShared()2902 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2903     return OpenLock(LOCK_SH);
2904 }
2905 
LockExclusive()2906 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2907     return OpenLock(LOCK_EX);
2908 }
2909 
UpdateStateFromString(const std::string & contents)2910 static UpdateState UpdateStateFromString(const std::string& contents) {
2911     if (contents.empty() || contents == "none") {
2912         return UpdateState::None;
2913     } else if (contents == "initiated") {
2914         return UpdateState::Initiated;
2915     } else if (contents == "unverified") {
2916         return UpdateState::Unverified;
2917     } else if (contents == "merging") {
2918         return UpdateState::Merging;
2919     } else if (contents == "merge-completed") {
2920         return UpdateState::MergeCompleted;
2921     } else if (contents == "merge-needs-reboot") {
2922         return UpdateState::MergeNeedsReboot;
2923     } else if (contents == "merge-failed") {
2924         return UpdateState::MergeFailed;
2925     } else if (contents == "cancelled") {
2926         return UpdateState::Cancelled;
2927     } else {
2928         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2929         return UpdateState::None;
2930     }
2931 }
2932 
operator <<(std::ostream & os,UpdateState state)2933 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2934     switch (state) {
2935         case UpdateState::None:
2936             return os << "none";
2937         case UpdateState::Initiated:
2938             return os << "initiated";
2939         case UpdateState::Unverified:
2940             return os << "unverified";
2941         case UpdateState::Merging:
2942             return os << "merging";
2943         case UpdateState::MergeCompleted:
2944             return os << "merge-completed";
2945         case UpdateState::MergeNeedsReboot:
2946             return os << "merge-needs-reboot";
2947         case UpdateState::MergeFailed:
2948             return os << "merge-failed";
2949         case UpdateState::Cancelled:
2950             return os << "cancelled";
2951         default:
2952             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2953             return os;
2954     }
2955 }
2956 
operator <<(std::ostream & os,MergePhase phase)2957 std::ostream& operator<<(std::ostream& os, MergePhase phase) {
2958     switch (phase) {
2959         case MergePhase::NO_MERGE:
2960             return os << "none";
2961         case MergePhase::FIRST_PHASE:
2962             return os << "first";
2963         case MergePhase::SECOND_PHASE:
2964             return os << "second";
2965         default:
2966             LOG(ERROR) << "Unknown merge phase: " << static_cast<uint32_t>(phase);
2967             return os << "unknown(" << static_cast<uint32_t>(phase) << ")";
2968     }
2969 }
2970 
ReadUpdateState(LockedFile * lock)2971 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2972     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2973     return status.state();
2974 }
2975 
ReadSnapshotUpdateStatus(LockedFile * lock)2976 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2977     CHECK(lock);
2978 
2979     SnapshotUpdateStatus status = {};
2980     std::string contents;
2981     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2982         PLOG(ERROR) << "Read state file failed";
2983         status.set_state(UpdateState::None);
2984         return status;
2985     }
2986 
2987     if (!status.ParseFromString(contents)) {
2988         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2989 
2990         // Try to rollback to legacy file to support devices that are
2991         // currently using the old file format.
2992         // TODO(b/147409432)
2993         status.set_state(UpdateStateFromString(contents));
2994     }
2995 
2996     return status;
2997 }
2998 
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2999 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
3000                                        MergeFailureCode failure_code) {
3001     SnapshotUpdateStatus status;
3002     status.set_state(state);
3003 
3004     switch (state) {
3005         case UpdateState::MergeFailed:
3006             status.set_merge_failure_code(failure_code);
3007             break;
3008         case UpdateState::Initiated:
3009             status.set_source_build_fingerprint(
3010                     android::base::GetProperty("ro.build.fingerprint", ""));
3011             break;
3012         default:
3013             break;
3014     }
3015 
3016     // If we're transitioning between two valid states (eg, we're not beginning
3017     // or ending an OTA), then make sure to propagate the compression bit and
3018     // build fingerprint.
3019     if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
3020         SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
3021         status.set_using_snapuserd(old_status.using_snapuserd());
3022         status.set_source_build_fingerprint(old_status.source_build_fingerprint());
3023         status.set_merge_phase(old_status.merge_phase());
3024         status.set_userspace_snapshots(old_status.userspace_snapshots());
3025         status.set_io_uring_enabled(old_status.io_uring_enabled());
3026         status.set_legacy_snapuserd(old_status.legacy_snapuserd());
3027         status.set_o_direct(old_status.o_direct());
3028     }
3029     return WriteSnapshotUpdateStatus(lock, status);
3030 }
3031 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)3032 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
3033                                                 const SnapshotUpdateStatus& status) {
3034     CHECK(lock);
3035     CHECK(lock->lock_mode() == LOCK_EX);
3036 
3037     std::string contents;
3038     if (!status.SerializeToString(&contents)) {
3039         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
3040         return false;
3041     }
3042 
3043 #ifdef LIBSNAPSHOT_USE_HAL
3044     auto merge_status = MergeStatus::UNKNOWN;
3045     switch (status.state()) {
3046         // The needs-reboot and completed cases imply that /data and /metadata
3047         // can be safely wiped, so we don't report a merge status.
3048         case UpdateState::None:
3049         case UpdateState::MergeNeedsReboot:
3050         case UpdateState::MergeCompleted:
3051         case UpdateState::Initiated:
3052             merge_status = MergeStatus::NONE;
3053             break;
3054         case UpdateState::Unverified:
3055             merge_status = MergeStatus::SNAPSHOTTED;
3056             break;
3057         case UpdateState::Merging:
3058         case UpdateState::MergeFailed:
3059             merge_status = MergeStatus::MERGING;
3060             break;
3061         default:
3062             // Note that Cancelled flows to here - it is never written, since
3063             // it only communicates a transient state to the caller.
3064             LOG(ERROR) << "Unexpected update status: " << status.state();
3065             break;
3066     }
3067 
3068     bool set_before_write =
3069             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
3070     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3071         return false;
3072     }
3073 #endif
3074 
3075     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
3076         PLOG(ERROR) << "Could not write to state file";
3077         return false;
3078     }
3079 
3080 #ifdef LIBSNAPSHOT_USE_HAL
3081     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
3082         return false;
3083     }
3084 #endif
3085     return true;
3086 }
3087 
GetSnapshotStatusFilePath(const std::string & name)3088 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
3089     auto file = metadata_dir_ + "/snapshots/"s + name;
3090     return file;
3091 }
3092 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)3093 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
3094                                          SnapshotStatus* status) {
3095     CHECK(lock);
3096     auto path = GetSnapshotStatusFilePath(name);
3097 
3098     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3099     if (fd < 0) {
3100         PLOG(ERROR) << "Open failed: " << path;
3101         return false;
3102     }
3103 
3104     if (!status->ParseFromFileDescriptor(fd.get())) {
3105         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
3106         return false;
3107     }
3108 
3109     if (status->name() != name) {
3110         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
3111         status->set_name(name);
3112     }
3113 
3114     return true;
3115 }
3116 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)3117 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
3118     // The caller must take an exclusive lock to modify snapshots.
3119     CHECK(lock);
3120     CHECK(lock->lock_mode() == LOCK_EX);
3121     CHECK(!status.name().empty());
3122 
3123     auto path = GetSnapshotStatusFilePath(status.name());
3124 
3125     std::string content;
3126     if (!status.SerializeToString(&content)) {
3127         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
3128         return false;
3129     }
3130 
3131     if (!WriteStringToFileAtomic(content, path)) {
3132         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
3133         return false;
3134     }
3135 
3136     return true;
3137 }
3138 
EnsureImageManager()3139 bool SnapshotManager::EnsureImageManager() {
3140     if (images_) return true;
3141 
3142     images_ = device_->OpenImageManager();
3143     if (!images_) {
3144         LOG(ERROR) << "Could not open ImageManager";
3145         return false;
3146     }
3147     return true;
3148 }
3149 
EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms)3150 bool SnapshotManager::EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms) {
3151     if (snapuserd_client_) {
3152         return true;
3153     }
3154 
3155     if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
3156         return false;
3157     }
3158 
3159     snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, timeout_ms);
3160     if (!snapuserd_client_) {
3161         LOG(ERROR) << "Unable to connect to snapuserd";
3162         return false;
3163     }
3164     return true;
3165 }
3166 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)3167 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
3168     std::vector<std::string> to_delete;
3169     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
3170         if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
3171             LOG(WARNING) << existing_cow_partition->name()
3172                          << " cannot be unmapped and its space cannot be reclaimed";
3173             continue;
3174         }
3175         to_delete.push_back(existing_cow_partition->name());
3176     }
3177     for (const auto& name : to_delete) {
3178         current_metadata->RemovePartition(name);
3179     }
3180 }
3181 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3182 static Return AddRequiredSpace(Return orig,
3183                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3184     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
3185         return orig;
3186     }
3187     uint64_t sum = 0;
3188     for (auto&& [name, status] : all_snapshot_status) {
3189         sum += status.cow_file_size();
3190     }
3191     LOG(INFO) << "Calculated needed COW space: " << sum << " bytes";
3192     return Return::NoSpace(sum);
3193 }
3194 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)3195 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
3196     auto lock = LockExclusive();
3197     if (!lock) return Return::Error();
3198 
3199     auto update_state = ReadUpdateState(lock.get());
3200     if (update_state != UpdateState::Initiated) {
3201         LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
3202         return Return::Error();
3203     }
3204 
3205     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
3206     // partition takes up a big chunk of space in super, causing COW images to be created on
3207     // retrofit Virtual A/B devices.
3208     if (device_->IsOverlayfsSetup()) {
3209         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
3210                    << ", reboot, then try again.";
3211         return Return::Error();
3212     }
3213 
3214     const auto& opener = device_->GetPartitionOpener();
3215     auto current_suffix = device_->GetSlotSuffix();
3216     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
3217     auto target_suffix = device_->GetOtherSlotSuffix();
3218     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
3219     auto current_super = device_->GetSuperDevice(current_slot);
3220 
3221     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
3222     if (current_metadata == nullptr) {
3223         LOG(ERROR) << "Cannot create metadata builder.";
3224         return Return::Error();
3225     }
3226 
3227     auto target_metadata =
3228             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
3229     if (target_metadata == nullptr) {
3230         LOG(ERROR) << "Cannot create target metadata builder.";
3231         return Return::Error();
3232     }
3233 
3234     // Delete partitions with target suffix in |current_metadata|. Otherwise,
3235     // partition_cow_creator recognizes these left-over partitions as used space.
3236     for (const auto& group_name : current_metadata->ListGroups()) {
3237         if (android::base::EndsWith(group_name, target_suffix)) {
3238             current_metadata->RemoveGroupAndPartitions(group_name);
3239         }
3240     }
3241 
3242     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
3243     if (!metadata_updater.Update()) {
3244         LOG(ERROR) << "Cannot calculate new metadata.";
3245         return Return::Error();
3246     }
3247 
3248     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
3249     // free regions.
3250     UnmapAndDeleteCowPartition(current_metadata.get());
3251 
3252     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
3253     // devices with retrofit dynamic partitions does not make sense.
3254     // This ensures that current_metadata->GetFreeRegions() uses the same device
3255     // indices as target_metadata (i.e. 0 -> "super").
3256     // This is also assumed in MapCowDevices() call below.
3257     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
3258           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
3259 
3260     const auto& dap_metadata = manifest.dynamic_partition_metadata();
3261 
3262     std::string vabc_disable_reason;
3263     if (!dap_metadata.vabc_enabled()) {
3264         vabc_disable_reason = "not enabled metadata";
3265     } else if (device_->IsRecovery()) {
3266         vabc_disable_reason = "recovery";
3267     } else if (!KernelSupportsCompressedSnapshots()) {
3268         vabc_disable_reason = "kernel missing userspace block device support";
3269     }
3270 
3271     // Deduce supported features.
3272     bool userspace_snapshots = CanUseUserspaceSnapshots();
3273     bool legacy_compression = GetLegacyCompressionEnabledProperty();
3274     bool is_legacy_snapuserd = IsVendorFromAndroid12();
3275 
3276     if (!vabc_disable_reason.empty()) {
3277         if (userspace_snapshots) {
3278             LOG(INFO) << "Userspace snapshots disabled: " << vabc_disable_reason;
3279         }
3280         if (legacy_compression) {
3281             LOG(INFO) << "Compression disabled: " << vabc_disable_reason;
3282         }
3283         userspace_snapshots = false;
3284         legacy_compression = false;
3285         is_legacy_snapuserd = false;
3286     }
3287 
3288     if (legacy_compression || userspace_snapshots) {
3289         if (dap_metadata.cow_version() < kMinCowVersion ||
3290             dap_metadata.cow_version() > kMaxCowVersion) {
3291             LOG(ERROR) << "Manifest cow version is out of bounds (got: "
3292                        << dap_metadata.cow_version() << ", min: " << kMinCowVersion
3293                        << ", max: " << kMaxCowVersion << ")";
3294             return Return::Error();
3295         }
3296     }
3297 
3298     if (!userspace_snapshots && is_legacy_snapuserd && legacy_compression) {
3299         userspace_snapshots = true;
3300         LOG(INFO) << "Vendor from Android 12. Enabling userspace snapshot for OTA install";
3301     }
3302 
3303     const bool using_snapuserd = userspace_snapshots || legacy_compression;
3304     if (!using_snapuserd) {
3305         LOG(INFO) << "Using legacy Virtual A/B (dm-snapshot)";
3306     }
3307 
3308     std::string compression_algorithm;
3309     uint64_t compression_factor{};
3310     if (using_snapuserd) {
3311         compression_algorithm = dap_metadata.vabc_compression_param();
3312         compression_factor = dap_metadata.compression_factor();
3313         if (compression_algorithm.empty()) {
3314             // Older OTAs don't set an explicit compression type, so default to gz.
3315             compression_algorithm = "gz";
3316         }
3317         LOG(INFO) << "using compression algorithm: " << compression_algorithm
3318                   << ", max compressible block size: " << compression_factor;
3319     }
3320     auto read_ahead_size =
3321             android::base::GetUintProperty<uint>("ro.virtual_ab.read_ahead_size", kReadAheadSizeKb);
3322     PartitionCowCreator cow_creator{
3323             .target_metadata = target_metadata.get(),
3324             .target_suffix = target_suffix,
3325             .target_partition = nullptr,
3326             .current_metadata = current_metadata.get(),
3327             .current_suffix = current_suffix,
3328             .update = nullptr,
3329             .extra_extents = {},
3330             .using_snapuserd = using_snapuserd,
3331             .compression_algorithm = compression_algorithm,
3332             .compression_factor = compression_factor,
3333             .read_ahead_size = read_ahead_size,
3334     };
3335 
3336     if (dap_metadata.vabc_feature_set().has_threaded()) {
3337         cow_creator.enable_threading = dap_metadata.vabc_feature_set().threaded();
3338     }
3339     if (dap_metadata.vabc_feature_set().has_batch_writes()) {
3340         cow_creator.batched_writes = dap_metadata.vabc_feature_set().batch_writes();
3341     }
3342 
3343     // In case of error, automatically delete devices that are created along the way.
3344     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
3345     // these devices.
3346     AutoDeviceList created_devices;
3347     std::map<std::string, SnapshotStatus> all_snapshot_status;
3348     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
3349                                              &all_snapshot_status);
3350     if (!ret.is_ok()) {
3351         LOG(ERROR) << "CreateUpdateSnapshotsInternal failed: " << ret.string();
3352         return ret;
3353     }
3354 
3355     auto exported_target_metadata = target_metadata->Export();
3356     if (exported_target_metadata == nullptr) {
3357         LOG(ERROR) << "Cannot export target metadata";
3358         return Return::Error();
3359     }
3360 
3361     ret = InitializeUpdateSnapshots(lock.get(), dap_metadata.cow_version(), target_metadata.get(),
3362                                     exported_target_metadata.get(), target_suffix,
3363                                     all_snapshot_status);
3364     if (!ret.is_ok()) return ret;
3365 
3366     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
3367                               *exported_target_metadata, target_slot)) {
3368         LOG(ERROR) << "Cannot write target metadata";
3369         return Return::Error();
3370     }
3371 
3372     // If snapuserd is enabled, we need to retain a copy of the old metadata
3373     // so we can access original blocks in case they are moved around. We do
3374     // not want to rely on the old super metadata slot because we don't
3375     // guarantee its validity after the slot switch is successful.
3376     if (using_snapuserd) {
3377         auto metadata = current_metadata->Export();
3378         if (!metadata) {
3379             LOG(ERROR) << "Could not export current metadata";
3380             return Return::Error();
3381         }
3382 
3383         auto path = GetOldPartitionMetadataPath();
3384         if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
3385             LOG(ERROR) << "Cannot write old metadata to " << path;
3386             return Return::Error();
3387         }
3388     }
3389 
3390     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3391     status.set_state(update_state);
3392     status.set_using_snapuserd(using_snapuserd);
3393 
3394     if (userspace_snapshots) {
3395         status.set_userspace_snapshots(true);
3396         LOG(INFO) << "Virtual A/B using userspace snapshots";
3397 
3398         if (GetIouringEnabledProperty()) {
3399             status.set_io_uring_enabled(true);
3400             LOG(INFO) << "io_uring for snapshots enabled";
3401         }
3402         if (GetODirectEnabledProperty()) {
3403             status.set_o_direct(true);
3404             LOG(INFO) << "o_direct for source image enabled";
3405         }
3406         if (is_legacy_snapuserd) {
3407             status.set_legacy_snapuserd(true);
3408             LOG(INFO) << "Setting legacy_snapuserd to true";
3409         }
3410     } else if (legacy_compression) {
3411         LOG(INFO) << "Virtual A/B using legacy snapuserd";
3412     } else {
3413         LOG(INFO) << "Virtual A/B using dm-snapshot";
3414     }
3415 
3416     is_snapshot_userspace_.emplace(userspace_snapshots);
3417     is_legacy_snapuserd_.emplace(is_legacy_snapuserd);
3418 
3419     if (!device()->IsTestDevice() && using_snapuserd) {
3420         // Terminate stale daemon if any
3421         std::unique_ptr<SnapuserdClient> snapuserd_client = std::move(snapuserd_client_);
3422         if (!snapuserd_client) {
3423             snapuserd_client = SnapuserdClient::TryConnect(kSnapuserdSocket, 5s);
3424         }
3425         if (snapuserd_client) {
3426             snapuserd_client->DetachSnapuserd();
3427             snapuserd_client = nullptr;
3428         }
3429     }
3430 
3431     if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
3432         LOG(ERROR) << "Unable to write new update state";
3433         return Return::Error();
3434     }
3435 
3436     created_devices.Release();
3437     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
3438 
3439     return Return::Ok();
3440 }
3441 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)3442 Return SnapshotManager::CreateUpdateSnapshotsInternal(
3443         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
3444         AutoDeviceList* created_devices,
3445         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
3446     CHECK(lock);
3447 
3448     auto* target_metadata = cow_creator->target_metadata;
3449     const auto& target_suffix = cow_creator->target_suffix;
3450 
3451     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
3452         LOG(ERROR) << "Cannot add group " << kCowGroupName;
3453         return Return::Error();
3454     }
3455 
3456     std::map<std::string, const PartitionUpdate*> partition_map;
3457     std::map<std::string, std::vector<Extent>> extra_extents_map;
3458     for (const auto& partition_update : manifest.partitions()) {
3459         auto suffixed_name = partition_update.partition_name() + target_suffix;
3460         auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
3461         if (!inserted) {
3462             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
3463                        << " in update manifest.";
3464             return Return::Error();
3465         }
3466 
3467         auto& extra_extents = extra_extents_map[suffixed_name];
3468         if (partition_update.has_hash_tree_extent()) {
3469             extra_extents.push_back(partition_update.hash_tree_extent());
3470         }
3471         if (partition_update.has_fec_extent()) {
3472             extra_extents.push_back(partition_update.fec_extent());
3473         }
3474     }
3475 
3476     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3477         cow_creator->target_partition = target_partition;
3478         cow_creator->update = nullptr;
3479         auto iter = partition_map.find(target_partition->name());
3480         if (iter != partition_map.end()) {
3481             cow_creator->update = iter->second;
3482         } else {
3483             LOG(INFO) << target_partition->name()
3484                       << " isn't included in the payload, skipping the cow creation.";
3485             continue;
3486         }
3487 
3488         cow_creator->extra_extents.clear();
3489         auto extra_extents_it = extra_extents_map.find(target_partition->name());
3490         if (extra_extents_it != extra_extents_map.end()) {
3491             cow_creator->extra_extents = std::move(extra_extents_it->second);
3492         }
3493 
3494         // Compute the device sizes for the partition.
3495         auto cow_creator_ret = cow_creator->Run();
3496         if (!cow_creator_ret.has_value()) {
3497             LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
3498             return Return::Error();
3499         }
3500 
3501         LOG(INFO) << "For partition " << target_partition->name()
3502                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
3503                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
3504                   << ", cow partition size = "
3505                   << cow_creator_ret->snapshot_status.cow_partition_size()
3506                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
3507 
3508         // Delete any existing snapshot before re-creating one.
3509         if (!DeleteSnapshot(lock, target_partition->name())) {
3510             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
3511                        << target_partition->name();
3512             return Return::Error();
3513         }
3514 
3515         // It is possible that the whole partition uses free space in super, and snapshot / COW
3516         // would not be needed. In this case, skip the partition.
3517         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
3518         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
3519                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
3520         CHECK(needs_snapshot == needs_cow);
3521 
3522         if (!needs_snapshot) {
3523             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
3524                       << "because nothing needs to be snapshotted.";
3525             continue;
3526         }
3527 
3528         // Find the original partition size.
3529         auto name = target_partition->name();
3530         auto old_partition_name =
3531                 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
3532         auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
3533         if (old_partition) {
3534             cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
3535         }
3536 
3537         // Store these device sizes to snapshot status file.
3538         if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
3539             return Return::Error();
3540         }
3541         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
3542 
3543         // Create the COW partition. That is, use any remaining free space in super partition before
3544         // creating the COW images.
3545         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3546             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3547                     << "cow_partition_size == "
3548                     << cow_creator_ret->snapshot_status.cow_partition_size()
3549                     << " is not a multiple of sector size " << kSectorSize;
3550             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3551                                                                kCowGroupName, 0 /* flags */);
3552             if (cow_partition == nullptr) {
3553                 return Return::Error();
3554             }
3555 
3556             if (!target_metadata->ResizePartition(
3557                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3558                         cow_creator_ret->cow_partition_usable_regions)) {
3559                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3560                            << cow_creator_ret->snapshot_status.cow_partition_size();
3561                 return Return::Error();
3562             }
3563             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3564             // error in the future.
3565         }
3566 
3567         all_snapshot_status->emplace(target_partition->name(),
3568                                      std::move(cow_creator_ret->snapshot_status));
3569 
3570         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3571     }
3572 
3573     LOG(INFO) << "Allocating CoW images.";
3574 
3575     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3576         // Create the backing COW image if necessary.
3577         if (snapshot_status.cow_file_size() > 0) {
3578             auto ret = CreateCowImage(lock, name);
3579             if (!ret.is_ok()) {
3580                 LOG(ERROR) << "CreateCowImage failed: " << ret.string();
3581                 return AddRequiredSpace(ret, *all_snapshot_status);
3582             }
3583         }
3584 
3585         LOG(INFO) << "Successfully created snapshot for " << name;
3586     }
3587 
3588     return Return::Ok();
3589 }
3590 
InitializeUpdateSnapshots(LockedFile * lock,uint32_t cow_version,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3591 Return SnapshotManager::InitializeUpdateSnapshots(
3592         LockedFile* lock, uint32_t cow_version, MetadataBuilder* target_metadata,
3593         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3594         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3595     CHECK(lock);
3596 
3597     CreateLogicalPartitionParams cow_params{
3598             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3599             .metadata = exported_target_metadata,
3600             .timeout_ms = std::chrono::milliseconds::max(),
3601             .partition_opener = &device_->GetPartitionOpener(),
3602     };
3603     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3604         AutoDeviceList created_devices_for_cow;
3605 
3606         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3607             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3608                        << target_partition->name();
3609             return Return::Error();
3610         }
3611 
3612         auto it = all_snapshot_status.find(target_partition->name());
3613         if (it == all_snapshot_status.end()) continue;
3614         cow_params.partition_name = target_partition->name();
3615         std::string cow_name;
3616         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3617             return Return::Error();
3618         }
3619 
3620         std::string cow_path;
3621         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3622             LOG(ERROR) << "Cannot determine path for " << cow_name;
3623             return Return::Error();
3624         }
3625 
3626         if (!android::fs_mgr::WaitForFile(cow_path, 6s)) {
3627             LOG(ERROR) << "Timed out waiting for device to appear: " << cow_path;
3628             return Return::Error();
3629         }
3630 
3631         if (it->second.using_snapuserd()) {
3632             unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3633             if (fd < 0) {
3634                 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3635                             << cow_params.partition_name;
3636                 return Return::Error();
3637             }
3638 
3639             CowOptions options;
3640             if (device()->IsTestDevice()) {
3641                 options.scratch_space = false;
3642             }
3643             options.compression = it->second.compression_algorithm();
3644             if (cow_version >= 3) {
3645                 options.op_count_max = it->second.estimated_ops_buffer_size();
3646                 options.max_blocks = {it->second.device_size() / options.block_size};
3647             }
3648 
3649             auto writer = CreateCowWriter(cow_version, options, std::move(fd));
3650             if (!writer->Finalize()) {
3651                 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3652                 return Return::Error();
3653             }
3654         } else {
3655             auto ret = InitializeKernelCow(cow_path);
3656             if (!ret.is_ok()) {
3657                 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3658                            << cow_path;
3659                 return AddRequiredSpace(ret, all_snapshot_status);
3660             }
3661         }
3662         // Let destructor of created_devices_for_cow to unmap the COW devices.
3663     };
3664     return Return::Ok();
3665 }
3666 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3667 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3668                                         std::string* snapshot_path) {
3669     auto lock = LockShared();
3670     if (!lock) return false;
3671     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3672         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3673                    << params.GetPartitionName();
3674         return false;
3675     }
3676 
3677     SnapshotStatus status;
3678     if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3679         return false;
3680     }
3681     if (status.using_snapuserd()) {
3682         LOG(ERROR) << "Cannot use MapUpdateSnapshot with snapuserd";
3683         return false;
3684     }
3685 
3686     SnapshotPaths paths;
3687     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3688         return false;
3689     }
3690 
3691     if (!paths.snapshot_device.empty()) {
3692         *snapshot_path = paths.snapshot_device;
3693     } else {
3694         *snapshot_path = paths.target_device;
3695     }
3696     DCHECK(!snapshot_path->empty());
3697     return true;
3698 }
3699 
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,std::optional<uint64_t> label)3700 std::unique_ptr<ICowWriter> SnapshotManager::OpenSnapshotWriter(
3701         const android::fs_mgr::CreateLogicalPartitionParams& params,
3702         std::optional<uint64_t> label) {
3703 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3704     (void)params;
3705     (void)label;
3706 
3707     LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3708     return nullptr;
3709 #else
3710     // First unmap any existing mapping.
3711     auto lock = LockShared();
3712     if (!lock) return nullptr;
3713     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3714         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3715                    << params.GetPartitionName();
3716         return nullptr;
3717     }
3718 
3719     SnapshotPaths paths;
3720     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3721         return nullptr;
3722     }
3723 
3724     SnapshotStatus status;
3725     if (!paths.cow_device_name.empty()) {
3726         if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3727             return nullptr;
3728         }
3729     } else {
3730         // Currently, partition_cow_creator always creates snapshots. The
3731         // reason is that if partition X shrinks while partition Y grows, we
3732         // cannot bindly write to the newly freed extents in X. This would
3733         // make the old slot unusable. So, the entire size of the target
3734         // partition is currently considered snapshottable.
3735         LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3736         return nullptr;
3737     }
3738 
3739     if (!status.using_snapuserd()) {
3740         LOG(ERROR) << "Can only create snapshot writers with userspace or compressed snapshots";
3741         return nullptr;
3742     }
3743 
3744     return OpenCompressedSnapshotWriter(lock.get(), status, paths, label);
3745 #endif
3746 }
3747 
3748 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const SnapshotStatus & status,const SnapshotPaths & paths,std::optional<uint64_t> label)3749 std::unique_ptr<ICowWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3750         LockedFile* lock, const SnapshotStatus& status, const SnapshotPaths& paths,
3751         std::optional<uint64_t> label) {
3752     CHECK(lock);
3753 
3754     CowOptions cow_options;
3755     cow_options.compression = status.compression_algorithm();
3756     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3757     cow_options.batch_write = status.batched_writes();
3758     cow_options.num_compress_threads = status.enable_threading() ? 2 : 1;
3759     cow_options.op_count_max = status.estimated_ops_buffer_size();
3760     cow_options.compression_factor = status.compression_factor();
3761     // Disable scratch space for vts tests
3762     if (device()->IsTestDevice()) {
3763         cow_options.scratch_space = false;
3764     }
3765 
3766     // Currently we don't support partial snapshots, since partition_cow_creator
3767     // never creates this scenario.
3768     CHECK(status.snapshot_size() == status.device_size());
3769 
3770     std::string cow_path;
3771     if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3772         LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3773         return nullptr;
3774     }
3775 
3776     unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3777     if (cow_fd < 0) {
3778         PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3779         return nullptr;
3780     }
3781 
3782     CowHeaderV3 header;
3783     if (!ReadCowHeader(cow_fd, &header)) {
3784         LOG(ERROR) << "OpenCompressedSnapshotWriter: read header failed";
3785         return nullptr;
3786     }
3787 
3788     return CreateCowWriter(header.prefix.major_version, cow_options, std::move(cow_fd), label);
3789 }
3790 #endif  // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3791 
UnmapUpdateSnapshot(const std::string & target_partition_name)3792 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3793     auto lock = LockShared();
3794     if (!lock) return false;
3795     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3796 }
3797 
UnmapAllPartitionsInRecovery()3798 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3799     auto lock = LockExclusive();
3800     if (!lock) return false;
3801 
3802     const auto& opener = device_->GetPartitionOpener();
3803     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3804     auto super_device = device_->GetSuperDevice(slot);
3805     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3806     if (!metadata) {
3807         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3808         return false;
3809     }
3810 
3811     bool ok = true;
3812     for (const auto& partition : metadata->partitions) {
3813         auto partition_name = GetPartitionName(partition);
3814         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3815     }
3816     return ok;
3817 }
3818 
operator <<(std::ostream & os,SnapshotManager::Slot slot)3819 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3820     switch (slot) {
3821         case SnapshotManager::Slot::Unknown:
3822             return os << "unknown";
3823         case SnapshotManager::Slot::Source:
3824             return os << "source";
3825         case SnapshotManager::Slot::Target:
3826             return os << "target";
3827     }
3828 }
3829 
Dump(std::ostream & os)3830 bool SnapshotManager::Dump(std::ostream& os) {
3831     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3832     // if it is racy.
3833     auto file = OpenLock(0 /* lock flag */);
3834     if (!file) return false;
3835 
3836     std::stringstream ss;
3837 
3838     auto update_status = ReadSnapshotUpdateStatus(file.get());
3839 
3840     ss << "Update state: " << update_status.state() << std::endl;
3841     ss << "Using snapuserd: " << update_status.using_snapuserd() << std::endl;
3842     ss << "Using userspace snapshots: " << update_status.userspace_snapshots() << std::endl;
3843     ss << "Using io_uring: " << update_status.io_uring_enabled() << std::endl;
3844     ss << "Using o_direct: " << update_status.o_direct() << std::endl;
3845     ss << "Using XOR compression: " << GetXorCompressionEnabledProperty() << std::endl;
3846     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3847     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3848     ss << "Rollback indicator: "
3849        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3850        << std::endl;
3851     ss << "Forward merge indicator: "
3852        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3853        << std::endl;
3854     ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3855 
3856     if (update_status.state() == UpdateState::Merging) {
3857         ss << "Merge completion: ";
3858         if (!EnsureSnapuserdConnected()) {
3859             ss << "N/A";
3860         } else {
3861             ss << snapuserd_client_->GetMergePercent() << "%";
3862         }
3863         ss << std::endl;
3864         ss << "Merge phase: " << update_status.merge_phase() << std::endl;
3865     }
3866 
3867     bool ok = true;
3868     std::vector<std::string> snapshots;
3869     if (!ListSnapshots(file.get(), &snapshots)) {
3870         LOG(ERROR) << "Could not list snapshots";
3871         snapshots.clear();
3872         ok = false;
3873     }
3874     for (const auto& name : snapshots) {
3875         ss << "Snapshot: " << name << std::endl;
3876         SnapshotStatus status;
3877         if (!ReadSnapshotStatus(file.get(), name, &status)) {
3878             ok = false;
3879             continue;
3880         }
3881         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
3882         ss << "    device size (bytes): " << status.device_size() << std::endl;
3883         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
3884         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3885         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
3886         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
3887         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
3888         ss << "    compression: " << status.compression_algorithm() << std::endl;
3889         ss << "    compression factor: " << status.compression_factor() << std::endl;
3890         ss << "    merge phase: " << DecideMergePhase(status) << std::endl;
3891     }
3892     os << ss.rdbuf();
3893     return ok;
3894 }
3895 
EnsureMetadataMounted()3896 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3897     if (!device_->IsRecovery()) {
3898         // No need to mount anything in recovery.
3899         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3900         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3901     }
3902     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3903     if (ret == nullptr) return nullptr;
3904 
3905     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3906     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3907     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3908     // treat this case as if /metadata is not mounted.
3909     if (!LockShared()) {
3910         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3911                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3912         return nullptr;
3913     }
3914     return ret;
3915 }
3916 
HandleImminentDataWipe(const std::function<void ()> & callback)3917 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3918     if (!device_->IsRecovery()) {
3919         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3920         return false;
3921     }
3922 
3923     auto mount = EnsureMetadataMounted();
3924     if (!mount || !mount->HasDevice()) {
3925         // We allow the wipe to continue, because if we can't mount /metadata,
3926         // it is unlikely the device would have booted anyway. If there is no
3927         // metadata partition, then the device predates Virtual A/B.
3928         return true;
3929     }
3930 
3931     // Check this early, so we don't accidentally start trying to populate
3932     // the state file in recovery. Note we don't call GetUpdateState since
3933     // we want errors in acquiring the lock to be propagated, instead of
3934     // returning UpdateState::None.
3935     auto state_file = GetStateFilePath();
3936     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3937         return true;
3938     }
3939 
3940     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3941     auto super_path = device_->GetSuperDevice(slot_number);
3942     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3943         LOG(ERROR) << "Unable to map partitions to complete merge.";
3944         return false;
3945     }
3946 
3947     auto process_callback = [&]() -> bool {
3948         if (callback) {
3949             callback();
3950         }
3951         return true;
3952     };
3953 
3954     in_factory_data_reset_ = true;
3955     UpdateState state =
3956             ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3957     in_factory_data_reset_ = false;
3958 
3959     if (state == UpdateState::MergeFailed) {
3960         return false;
3961     }
3962 
3963     // Nothing should be depending on partitions now, so unmap them all.
3964     if (!UnmapAllPartitionsInRecovery()) {
3965         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3966     }
3967 
3968     if (state != UpdateState::None) {
3969         auto lock = LockExclusive();
3970         if (!lock) return false;
3971 
3972         // Zap the update state so the bootloader doesn't think we're still
3973         // merging. It's okay if this fails, it's informative only at this
3974         // point.
3975         WriteUpdateState(lock.get(), UpdateState::None);
3976     }
3977     return true;
3978 }
3979 
FinishMergeInRecovery()3980 bool SnapshotManager::FinishMergeInRecovery() {
3981     if (!device_->IsRecovery()) {
3982         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3983         return false;
3984     }
3985 
3986     auto mount = EnsureMetadataMounted();
3987     if (!mount || !mount->HasDevice()) {
3988         return false;
3989     }
3990 
3991     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3992     auto super_path = device_->GetSuperDevice(slot_number);
3993     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3994         LOG(ERROR) << "Unable to map partitions to complete merge.";
3995         return false;
3996     }
3997 
3998     UpdateState state = ProcessUpdateState();
3999     if (state != UpdateState::MergeCompleted) {
4000         LOG(ERROR) << "Merge returned unexpected status: " << state;
4001         return false;
4002     }
4003 
4004     // Nothing should be depending on partitions now, so unmap them all.
4005     if (!UnmapAllPartitionsInRecovery()) {
4006         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
4007     }
4008     return true;
4009 }
4010 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)4011 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
4012                                                           const std::function<bool()>& callback) {
4013     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
4014     UpdateState state = ProcessUpdateState(callback);
4015     LOG(INFO) << "Update state in recovery: " << state;
4016     switch (state) {
4017         case UpdateState::MergeFailed:
4018             LOG(ERROR) << "Unrecoverable merge failure detected.";
4019             return state;
4020         case UpdateState::Unverified: {
4021             // If an OTA was just applied but has not yet started merging:
4022             //
4023             // - if forward merge is allowed, initiate merge and call
4024             // ProcessUpdateState again.
4025             //
4026             // - if forward merge is not allowed, we
4027             // have no choice but to revert slots, because the current slot will
4028             // immediately become unbootable. Rather than wait for the device
4029             // to reboot N times until a rollback, we proactively disable the
4030             // new slot instead.
4031             //
4032             // Since the rollback is inevitable, we don't treat a HAL failure
4033             // as an error here.
4034             auto slot = GetCurrentSlot();
4035             if (slot == Slot::Target) {
4036                 if (allow_forward_merge &&
4037                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
4038                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
4039 
4040                     if (!InitiateMerge()) {
4041                         LOG(ERROR) << "Failed to initiate merge on data wipe.";
4042                         return UpdateState::MergeFailed;
4043                     }
4044                     return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
4045                 }
4046 
4047                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
4048                 device_->SetSlotAsUnbootable(slot_number);
4049             } else {
4050                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
4051             }
4052             break;
4053         }
4054         case UpdateState::MergeNeedsReboot:
4055             // We shouldn't get here, because nothing is depending on
4056             // logical partitions.
4057             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
4058             break;
4059         default:
4060             break;
4061     }
4062     return state;
4063 }
4064 
EnsureNoOverflowSnapshot(LockedFile * lock)4065 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
4066     CHECK(lock);
4067 
4068     std::vector<std::string> snapshots;
4069     if (!ListSnapshots(lock, &snapshots)) {
4070         LOG(ERROR) << "Could not list snapshots.";
4071         return false;
4072     }
4073 
4074     for (const auto& snapshot : snapshots) {
4075         SnapshotStatus status;
4076         if (!ReadSnapshotStatus(lock, snapshot, &status)) {
4077             return false;
4078         }
4079         if (status.using_snapuserd()) {
4080             continue;
4081         }
4082 
4083         std::vector<DeviceMapper::TargetInfo> targets;
4084         if (!dm_.GetTableStatus(snapshot, &targets)) {
4085             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
4086             return false;
4087         }
4088         if (targets.size() != 1) {
4089             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
4090                        << ", size = " << targets.size();
4091             return false;
4092         }
4093         if (targets[0].IsOverflowSnapshot()) {
4094             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
4095                        << ", CoW device size computation is wrong!";
4096             return false;
4097         }
4098     }
4099 
4100     return true;
4101 }
4102 
RecoveryCreateSnapshotDevices()4103 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
4104     if (!device_->IsRecovery()) {
4105         LOG(ERROR) << __func__ << " is only allowed in recovery.";
4106         return CreateResult::NOT_CREATED;
4107     }
4108 
4109     auto mount = EnsureMetadataMounted();
4110     if (!mount || !mount->HasDevice()) {
4111         LOG(ERROR) << "Couldn't mount Metadata.";
4112         return CreateResult::NOT_CREATED;
4113     }
4114     return RecoveryCreateSnapshotDevices(mount);
4115 }
4116 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)4117 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
4118         const std::unique_ptr<AutoDevice>& metadata_device) {
4119     if (!device_->IsRecovery()) {
4120         LOG(ERROR) << __func__ << " is only allowed in recovery.";
4121         return CreateResult::NOT_CREATED;
4122     }
4123 
4124     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
4125         LOG(ERROR) << "Metadata not mounted.";
4126         return CreateResult::NOT_CREATED;
4127     }
4128 
4129     auto state_file = GetStateFilePath();
4130     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
4131         LOG(ERROR) << "Couldn't access state file.";
4132         return CreateResult::NOT_CREATED;
4133     }
4134 
4135     if (!NeedSnapshotsInFirstStageMount()) {
4136         return CreateResult::NOT_CREATED;
4137     }
4138 
4139     auto slot_suffix = device_->GetOtherSlotSuffix();
4140     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
4141     auto super_path = device_->GetSuperDevice(slot_number);
4142     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4143         LOG(ERROR) << "Unable to map partitions.";
4144         return CreateResult::ERROR;
4145     }
4146     return CreateResult::CREATED;
4147 }
4148 
UpdateForwardMergeIndicator(bool wipe)4149 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
4150     auto path = GetForwardMergeIndicatorPath();
4151 
4152     if (!wipe) {
4153         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
4154         return RemoveFileIfExists(path);
4155     }
4156 
4157     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
4158 
4159     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
4160     if (!android::base::WriteStringToFile("1", path)) {
4161         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
4162         return false;
4163     }
4164 
4165     return true;
4166 }
4167 
GetSnapshotMergeStatsInstance()4168 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
4169     return SnapshotMergeStats::GetInstance(*this);
4170 }
4171 
4172 // This is only to be used in recovery or normal Android (not first-stage init).
4173 // We don't guarantee dm paths are available in first-stage init, because ueventd
4174 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)4175 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
4176                                                std::string* device_path) {
4177     // Try getting the device string if it is a device mapper device.
4178     if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4179         return dm_.GetDmDevicePathByName(device_name, device_path);
4180     }
4181 
4182     // Otherwise, get path from IImageManager.
4183     return images_->GetMappedImageDevice(device_name, device_path);
4184 }
4185 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)4186 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
4187                                                        std::string* device_string_or_mapped_path) {
4188     // Try getting the device string if it is a device mapper device.
4189     if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4190         return dm_.GetDeviceString(device_name, device_string_or_mapped_path);
4191     }
4192 
4193     // Otherwise, get path from IImageManager.
4194     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
4195         return false;
4196     }
4197 
4198     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
4199                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
4200                  << "may not be available in first stage init! ";
4201     return true;
4202 }
4203 
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)4204 bool SnapshotManager::WaitForDevice(const std::string& device,
4205                                     std::chrono::milliseconds timeout_ms) {
4206     if (!android::base::StartsWith(device, "/")) {
4207         return true;
4208     }
4209 
4210     // In first-stage init, we rely on init setting a callback which can
4211     // regenerate uevents and populate /dev for us.
4212     if (uevent_regen_callback_) {
4213         if (!uevent_regen_callback_(device)) {
4214             LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
4215             return false;
4216         }
4217         return true;
4218     }
4219 
4220     // Otherwise, the only kind of device we need to wait for is a dm-user
4221     // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
4222     // the path has been created.
4223     if (!android::base::StartsWith(device, "/dev/dm-user/")) {
4224         return true;
4225     }
4226 
4227     if (timeout_ms.count() == 0) {
4228         LOG(ERROR) << "No timeout was specified to wait for device: " << device;
4229         return false;
4230     }
4231     if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
4232         LOG(ERROR) << "Timed out waiting for device to appear: " << device;
4233         return false;
4234     }
4235     return true;
4236 }
4237 
IsSnapuserdRequired()4238 bool SnapshotManager::IsSnapuserdRequired() {
4239     auto lock = LockExclusive();
4240     if (!lock) return false;
4241 
4242     auto status = ReadSnapshotUpdateStatus(lock.get());
4243     return status.state() != UpdateState::None && status.using_snapuserd();
4244 }
4245 
PrepareSnapuserdArgsForSelinux(std::vector<std::string> * snapuserd_argv)4246 bool SnapshotManager::PrepareSnapuserdArgsForSelinux(std::vector<std::string>* snapuserd_argv) {
4247     return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
4248 }
4249 
DetachFirstStageSnapuserdForSelinux()4250 bool SnapshotManager::DetachFirstStageSnapuserdForSelinux() {
4251     LOG(INFO) << "Detaching first stage snapuserd";
4252 
4253     auto lock = LockExclusive();
4254     if (!lock) return false;
4255 
4256     std::vector<std::string> snapshots;
4257     if (!ListSnapshots(lock.get(), &snapshots)) {
4258         LOG(ERROR) << "Failed to list snapshots.";
4259         return false;
4260     }
4261 
4262     size_t num_cows = 0;
4263     size_t ok_cows = 0;
4264     for (const auto& snapshot : snapshots) {
4265         std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
4266 
4267         if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
4268             continue;
4269         }
4270 
4271         DeviceMapper::TargetInfo target;
4272         if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
4273             continue;
4274         }
4275 
4276         auto target_type = DeviceMapper::GetTargetType(target.spec);
4277         if (target_type != "user") {
4278             LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
4279             continue;
4280         }
4281 
4282         num_cows++;
4283         auto misc_name = user_cow_name;
4284 
4285         DmTable table;
4286         table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
4287         if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
4288             LOG(ERROR) << "Unable to swap tables for " << misc_name;
4289             continue;
4290         }
4291 
4292         // Wait for ueventd to acknowledge and create the control device node.
4293         std::string control_device = "/dev/dm-user/" + misc_name;
4294         if (!WaitForDevice(control_device, 10s)) {
4295             LOG(ERROR) << "dm-user control device no found:  " << misc_name;
4296             continue;
4297         }
4298 
4299         ok_cows++;
4300         LOG(INFO) << "control device is ready: " << control_device;
4301     }
4302 
4303     if (ok_cows != num_cows) {
4304         LOG(ERROR) << "Could not transition all snapuserd consumers.";
4305         return false;
4306     }
4307 
4308     return true;
4309 }
4310 
PerformSecondStageInitTransition()4311 bool SnapshotManager::PerformSecondStageInitTransition() {
4312     return PerformInitTransition(InitTransition::SECOND_STAGE);
4313 }
4314 
ReadOldPartitionMetadata(LockedFile * lock)4315 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
4316     CHECK(lock);
4317 
4318     if (!old_partition_metadata_) {
4319         auto path = GetOldPartitionMetadataPath();
4320         old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
4321         if (!old_partition_metadata_) {
4322             LOG(ERROR) << "Could not read old partition metadata from " << path;
4323             return nullptr;
4324         }
4325     }
4326     return old_partition_metadata_.get();
4327 }
4328 
DecideMergePhase(const SnapshotStatus & status)4329 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
4330     if (status.using_snapuserd() && status.device_size() < status.old_partition_size()) {
4331         return MergePhase::FIRST_PHASE;
4332     }
4333     return MergePhase::SECOND_PHASE;
4334 }
4335 
UpdateCowStats(ISnapshotMergeStats * stats)4336 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
4337     auto lock = LockExclusive();
4338     if (!lock) return;
4339 
4340     std::vector<std::string> snapshots;
4341     if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
4342         LOG(ERROR) << "Could not list snapshots";
4343         return;
4344     }
4345 
4346     uint64_t cow_file_size = 0;
4347     uint64_t total_cow_size = 0;
4348     uint64_t estimated_cow_size = 0;
4349     for (const auto& snapshot : snapshots) {
4350         SnapshotStatus status;
4351         if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
4352             return;
4353         }
4354 
4355         cow_file_size += status.cow_file_size();
4356         total_cow_size += status.cow_file_size() + status.cow_partition_size();
4357         estimated_cow_size += status.estimated_cow_size();
4358     }
4359 
4360     stats->report()->set_cow_file_size(cow_file_size);
4361     stats->report()->set_total_cow_size_bytes(total_cow_size);
4362     stats->report()->set_estimated_cow_size_bytes(estimated_cow_size);
4363 }
4364 
SetMergeStatsFeatures(ISnapshotMergeStats * stats)4365 void SnapshotManager::SetMergeStatsFeatures(ISnapshotMergeStats* stats) {
4366     auto lock = LockExclusive();
4367     if (!lock) return;
4368 
4369     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4370     stats->report()->set_iouring_used(update_status.io_uring_enabled());
4371     stats->report()->set_userspace_snapshots_used(update_status.userspace_snapshots());
4372     stats->report()->set_xor_compression_used(GetXorCompressionEnabledProperty());
4373 }
4374 
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)4375 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
4376                                            const std::chrono::milliseconds& timeout_ms) {
4377     auto start = std::chrono::steady_clock::now();
4378     while (true) {
4379         if (dm_.DeleteDeviceIfExists(name)) {
4380             return true;
4381         }
4382         auto now = std::chrono::steady_clock::now();
4383         auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
4384         if (elapsed >= timeout_ms) {
4385             break;
4386         }
4387         std::this_thread::sleep_for(400ms);
4388     }
4389 
4390     // Try to diagnose why this failed. First get the actual device path.
4391     std::string full_path;
4392     if (!dm_.GetDmDevicePathByName(name, &full_path)) {
4393         LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
4394         return false;
4395     }
4396 
4397     // Check for child dm-devices.
4398     std::string block_name = android::base::Basename(full_path);
4399     std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
4400 
4401     std::error_code ec;
4402     std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
4403     if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
4404         LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
4405         return false;
4406     }
4407 
4408     // Check for mounted partitions.
4409     android::fs_mgr::Fstab fstab;
4410     android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
4411     for (const auto& entry : fstab) {
4412         if (android::base::Basename(entry.blk_device) == block_name) {
4413             LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
4414             return false;
4415         }
4416     }
4417 
4418     // Check for detached mounted partitions.
4419     for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
4420         std::string fs_type = android::base::Basename(fs.path().c_str());
4421         if (!(fs_type == "ext4" || fs_type == "f2fs")) {
4422             continue;
4423         }
4424 
4425         std::string path = fs.path().c_str() + "/"s + block_name;
4426         if (access(path.c_str(), F_OK) == 0) {
4427             LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
4428                        << "; possibly open file descriptor or attached loop device.";
4429             return false;
4430         }
4431     }
4432 
4433     LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
4434                << " still in use."
4435                << "  Probably a file descriptor was leaked or held open, or a loop device is"
4436                << " attached.";
4437     return false;
4438 }
4439 
ReadMergeFailureCode()4440 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
4441     auto lock = LockExclusive();
4442     if (!lock) return MergeFailureCode::AcquireLock;
4443 
4444     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4445     if (status.state() != UpdateState::MergeFailed) {
4446         return MergeFailureCode::Ok;
4447     }
4448     return status.merge_failure_code();
4449 }
4450 
ReadSourceBuildFingerprint()4451 std::string SnapshotManager::ReadSourceBuildFingerprint() {
4452     auto lock = LockExclusive();
4453     if (!lock) return {};
4454 
4455     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4456     return status.source_build_fingerprint();
4457 }
4458 
IsUserspaceSnapshotUpdateInProgress()4459 bool SnapshotManager::IsUserspaceSnapshotUpdateInProgress() {
4460     // We cannot grab /metadata/ota lock here as this
4461     // is in reboot path. See b/308900853
4462     //
4463     // Check if any of the partitions are mounted
4464     // off dm-user block device. If so, then we are certain
4465     // that OTA update in progress.
4466     auto current_suffix = device_->GetSlotSuffix();
4467     auto& dm = DeviceMapper::Instance();
4468     auto dm_block_devices = dm.FindDmPartitions();
4469     if (dm_block_devices.empty()) {
4470         LOG(ERROR) << "No dm-enabled block device is found.";
4471         return false;
4472     }
4473     for (auto& partition : dm_block_devices) {
4474         std::string partition_name = partition.first + current_suffix;
4475         DeviceMapper::TargetInfo snap_target;
4476         if (!GetSingleTarget(partition_name, TableQuery::Status, &snap_target)) {
4477             return false;
4478         }
4479         auto type = DeviceMapper::GetTargetType(snap_target.spec);
4480         if (type == "user") {
4481             return true;
4482         }
4483     }
4484     return false;
4485 }
4486 
BootFromSnapshotsWithoutSlotSwitch()4487 bool SnapshotManager::BootFromSnapshotsWithoutSlotSwitch() {
4488     auto lock = LockExclusive();
4489     if (!lock) return false;
4490 
4491     auto contents = device_->GetSlotSuffix();
4492     // This is the indicator which tells first-stage init
4493     // to boot from snapshots even though there was no slot-switch
4494     auto boot_file = GetBootSnapshotsWithoutSlotSwitchPath();
4495     if (!WriteStringToFileAtomic(contents, boot_file)) {
4496         PLOG(ERROR) << "write failed: " << boot_file;
4497         return false;
4498     }
4499 
4500     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4501     update_status.set_state(UpdateState::Initiated);
4502     update_status.set_userspace_snapshots(true);
4503     update_status.set_using_snapuserd(true);
4504     if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4505         return false;
4506     }
4507     return true;
4508 }
4509 
PrepareDeviceToBootWithoutSnapshot()4510 bool SnapshotManager::PrepareDeviceToBootWithoutSnapshot() {
4511     auto lock = LockExclusive();
4512     if (!lock) return false;
4513 
4514     android::base::RemoveFileIfExists(GetSnapshotBootIndicatorPath());
4515     android::base::RemoveFileIfExists(GetBootSnapshotsWithoutSlotSwitchPath());
4516 
4517     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
4518     update_status.set_state(UpdateState::Cancelled);
4519     if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
4520         return false;
4521     }
4522     return true;
4523 }
4524 
SetReadAheadSize(const std::string & entry_block_device,off64_t size_kb)4525 void SnapshotManager::SetReadAheadSize(const std::string& entry_block_device, off64_t size_kb) {
4526     std::string block_device;
4527     if (!Realpath(entry_block_device, &block_device)) {
4528         PLOG(ERROR) << "Failed to realpath " << entry_block_device;
4529         return;
4530     }
4531 
4532     static constexpr std::string_view kDevBlockPrefix("/dev/block/");
4533     if (!android::base::StartsWith(block_device, kDevBlockPrefix)) {
4534         LOG(ERROR) << block_device << " is not a block device";
4535         return;
4536     }
4537 
4538     std::string block_name = block_device.substr(kDevBlockPrefix.length());
4539     std::string sys_partition =
4540             android::base::StringPrintf("/sys/class/block/%s/partition", block_name.c_str());
4541     struct stat info;
4542     if (lstat(sys_partition.c_str(), &info) == 0) {
4543         block_name += "/..";
4544     }
4545     std::string sys_ra = android::base::StringPrintf("/sys/class/block/%s/queue/read_ahead_kb",
4546                                                      block_name.c_str());
4547     std::string size = std::to_string(size_kb);
4548     android::base::WriteStringToFile(size, sys_ra.c_str());
4549 }
4550 
4551 }  // namespace snapshot
4552 }  // namespace android
4553