1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Checkpoint"
18 #include "Checkpoint.h"
19 #include "FsCrypt.h"
20 #include "KeyStorage.h"
21 #include "VoldUtil.h"
22 #include "VolumeManager.h"
23 
24 #include <fstream>
25 #include <list>
26 #include <memory>
27 #include <string>
28 #include <thread>
29 #include <vector>
30 
31 #include <BootControlClient.h>
32 #include <android-base/file.h>
33 #include <android-base/logging.h>
34 #include <android-base/parseint.h>
35 #include <android-base/properties.h>
36 #include <android-base/unique_fd.h>
37 #include <cutils/android_reboot.h>
38 #include <fcntl.h>
39 #include <fs_mgr.h>
40 #include <linux/fs.h>
41 #include <mntent.h>
42 #include <sys/mount.h>
43 #include <sys/stat.h>
44 #include <sys/statvfs.h>
45 #include <unistd.h>
46 
47 using android::base::GetBoolProperty;
48 using android::base::GetUintProperty;
49 using android::base::SetProperty;
50 using android::binder::Status;
51 using android::fs_mgr::Fstab;
52 using android::fs_mgr::ReadFstabFromFile;
53 using android::hal::BootControlClient;
54 
55 namespace android {
56 namespace vold {
57 
58 namespace {
59 const std::string kMetadataCPFile = "/metadata/vold/checkpoint";
60 
error(const std::string & msg)61 binder::Status error(const std::string& msg) {
62     PLOG(ERROR) << msg;
63     return binder::Status::fromServiceSpecificError(errno, String8(msg.c_str()));
64 }
65 
error(int error,const std::string & msg)66 binder::Status error(int error, const std::string& msg) {
67     LOG(ERROR) << msg;
68     return binder::Status::fromServiceSpecificError(error, String8(msg.c_str()));
69 }
70 
setBowState(std::string const & block_device,std::string const & state)71 bool setBowState(std::string const& block_device, std::string const& state) {
72     std::string bow_device = fs_mgr_find_bow_device(block_device);
73     if (bow_device.empty()) return false;
74 
75     if (!android::base::WriteStringToFile(state, bow_device + "/bow/state")) {
76         PLOG(ERROR) << "Failed to write to file " << bow_device + "/bow/state";
77         return false;
78     }
79 
80     return true;
81 }
82 
83 // Do any work that was deferred until the userdata filesystem checkpoint was
84 // committed.  This work involves the deletion of resources that aren't covered
85 // by the userdata filesystem checkpoint, e.g. Keystore keys.
DoCheckpointCommittedWork()86 void DoCheckpointCommittedWork() {
87     // Take the crypt lock to provide synchronization with the Binder calls that
88     // operate on key directories.
89     std::lock_guard<std::mutex> lock(VolumeManager::Instance()->getCryptLock());
90 
91     DeferredCommitKeystoreKeys();
92     fscrypt_deferred_fixate_ce_keys();
93 }
94 
95 }  // namespace
96 
cp_supportsCheckpoint(bool & result)97 Status cp_supportsCheckpoint(bool& result) {
98     result = false;
99 
100     for (const auto& entry : fstab_default) {
101         if (entry.fs_mgr_flags.checkpoint_blk || entry.fs_mgr_flags.checkpoint_fs) {
102             result = true;
103             return Status::ok();
104         }
105     }
106     return Status::ok();
107 }
108 
cp_supportsBlockCheckpoint(bool & result)109 Status cp_supportsBlockCheckpoint(bool& result) {
110     result = false;
111 
112     for (const auto& entry : fstab_default) {
113         if (entry.fs_mgr_flags.checkpoint_blk) {
114             result = true;
115             return Status::ok();
116         }
117     }
118     return Status::ok();
119 }
120 
cp_supportsFileCheckpoint(bool & result)121 Status cp_supportsFileCheckpoint(bool& result) {
122     result = false;
123 
124     for (const auto& entry : fstab_default) {
125         if (entry.fs_mgr_flags.checkpoint_fs) {
126             result = true;
127             return Status::ok();
128         }
129     }
130     return Status::ok();
131 }
132 
cp_startCheckpoint(int retry)133 Status cp_startCheckpoint(int retry) {
134     bool result;
135     if (!cp_supportsCheckpoint(result).isOk() || !result)
136         return error(ENOTSUP, "Checkpoints not supported");
137 
138     if (retry < -1) return error(EINVAL, "Retry count must be more than -1");
139     std::string content = std::to_string(retry + 1);
140     if (retry == -1) {
141         auto module = BootControlClient::WaitForService();
142         if (module) {
143             std::string suffix = module->GetSuffix(module->GetCurrentSlot());
144             if (!suffix.empty()) content += " " + suffix;
145         }
146     }
147     if (!android::base::WriteStringToFile(content, kMetadataCPFile))
148         return error("Failed to write checkpoint file");
149     return Status::ok();
150 }
151 
152 namespace {
153 
154 volatile bool isCheckpointing = false;
155 volatile bool isBow = true;
156 
157 volatile bool needsCheckpointWasCalled = false;
158 
159 // Protects isCheckpointing, needsCheckpointWasCalled and code that makes decisions based on status
160 // of isCheckpointing
161 std::mutex isCheckpointingLock;
162 }
163 
cp_commitChanges()164 Status cp_commitChanges() {
165     std::lock_guard<std::mutex> lock(isCheckpointingLock);
166 
167     if (!isCheckpointing) {
168         return Status::ok();
169     }
170     if (android::base::GetProperty("persist.vold.dont_commit_checkpoint", "0") == "1") {
171         LOG(WARNING)
172             << "NOT COMMITTING CHECKPOINT BECAUSE persist.vold.dont_commit_checkpoint IS 1";
173         return Status::ok();
174     }
175     auto module = BootControlClient::WaitForService();
176     if (module) {
177         auto cr = module->MarkBootSuccessful();
178         if (!cr.success)
179             return error(EINVAL, "Error marking booted successfully: " + std::string(cr.errMsg));
180         LOG(INFO) << "Marked slot as booted successfully.";
181         // Clears the warm reset flag for next reboot.
182         if (!SetProperty("ota.warm_reset", "0")) {
183             LOG(WARNING) << "Failed to reset the warm reset flag";
184         }
185     } else {
186         LOG(ERROR) << "Failed to get BootControl HAL, not marking slot as successful.";
187     }
188     // Must take action for list of mounted checkpointed things here
189     // To do this, we walk the list of mounted file systems.
190     // But we also need to get the matching fstab entries to see
191     // the original flags
192     std::string err_str;
193 
194     Fstab mounts;
195     if (!ReadFstabFromFile("/proc/mounts", &mounts)) {
196         return error(EINVAL, "Failed to get /proc/mounts");
197     }
198 
199     // Walk mounted file systems
200     for (const auto& mount_rec : mounts) {
201         const auto fstab_rec =
202                 GetEntryForMountPoint(&fstab_default, mount_rec.mount_point, mount_rec.fs_type);
203         if (!fstab_rec) continue;
204 
205         if (fstab_rec->fs_mgr_flags.checkpoint_fs) {
206             if (fstab_rec->fs_type == "f2fs") {
207                 std::string options = mount_rec.fs_options + ",checkpoint=enable";
208                 if (mount(mount_rec.blk_device.c_str(), mount_rec.mount_point.c_str(), "none",
209                           MS_REMOUNT | fstab_rec->flags, options.c_str())) {
210                     return error(EINVAL, "Failed to remount");
211                 }
212             }
213         } else if (fstab_rec->fs_mgr_flags.checkpoint_blk && isBow) {
214             if (!setBowState(mount_rec.blk_device, "2"))
215                 return error(EINVAL, "Failed to set bow state");
216         }
217     }
218     SetProperty("vold.checkpoint_committed", "1");
219     LOG(INFO) << "Checkpoint has been committed.";
220     isCheckpointing = false;
221     if (!android::base::RemoveFileIfExists(kMetadataCPFile, &err_str))
222         return error(err_str.c_str());
223 
224     std::thread(DoCheckpointCommittedWork).detach();
225     return Status::ok();
226 }
227 
228 namespace {
abort_metadata_file()229 void abort_metadata_file() {
230     std::string oldContent, newContent;
231     int retry = 0;
232     struct stat st;
233     int result = stat(kMetadataCPFile.c_str(), &st);
234 
235     // If the file doesn't exist, we aren't managing a checkpoint retry counter
236     if (result != 0) return;
237     if (!android::base::ReadFileToString(kMetadataCPFile, &oldContent)) {
238         PLOG(ERROR) << "Failed to read checkpoint file";
239         return;
240     }
241     std::string retryContent = oldContent.substr(0, oldContent.find_first_of(" "));
242 
243     if (!android::base::ParseInt(retryContent, &retry)) {
244         PLOG(ERROR) << "Could not parse retry count";
245         return;
246     }
247     if (retry > 0) {
248         newContent = "0";
249         if (!android::base::WriteStringToFile(newContent, kMetadataCPFile))
250             PLOG(ERROR) << "Could not write checkpoint file";
251     }
252 }
253 }  // namespace
254 
cp_abortChanges(const std::string & message,bool retry)255 void cp_abortChanges(const std::string& message, bool retry) {
256     if (!cp_needsCheckpoint()) return;
257     if (!retry) abort_metadata_file();
258     android_reboot(ANDROID_RB_RESTART2, 0, message.c_str());
259 }
260 
cp_needsRollback()261 bool cp_needsRollback() {
262     std::string content;
263     bool ret;
264 
265     ret = android::base::ReadFileToString(kMetadataCPFile, &content);
266     if (ret) {
267         if (content == "0") return true;
268         if (content.substr(0, 3) == "-1 ") {
269             std::string oldSuffix = content.substr(3);
270             auto module = BootControlClient::WaitForService();
271             std::string newSuffix;
272 
273             if (module) {
274                 newSuffix = module->GetSuffix(module->GetCurrentSlot());
275                 if (oldSuffix == newSuffix) return true;
276             }
277         }
278     }
279     return false;
280 }
281 
cp_needsCheckpoint()282 bool cp_needsCheckpoint() {
283     std::lock_guard<std::mutex> lock(isCheckpointingLock);
284 
285     // Make sure we only return true during boot. See b/138952436 for discussion
286     if (needsCheckpointWasCalled) return isCheckpointing;
287     needsCheckpointWasCalled = true;
288 
289     bool ret;
290     std::string content;
291     auto module = BootControlClient::WaitForService();
292 
293     if (isCheckpointing) return isCheckpointing;
294     // In case of INVALID slot or other failures, we do not perform checkpoint.
295     if (module && !module->IsSlotMarkedSuccessful(module->GetCurrentSlot()).value_or(true)) {
296         isCheckpointing = true;
297         return true;
298     }
299     ret = android::base::ReadFileToString(kMetadataCPFile, &content);
300     if (ret) {
301         ret = content != "0";
302         isCheckpointing = ret;
303         return ret;
304     }
305     return false;
306 }
307 
cp_isCheckpointing()308 bool cp_isCheckpointing() {
309     return isCheckpointing;
310 }
311 
312 namespace {
313 const std::string kSleepTimeProp = "ro.sys.cp_msleeptime";
314 const uint32_t msleeptime_default = 1000;  // 1 s
315 const uint32_t max_msleeptime = 3600000;   // 1 h
316 
317 const std::string kMinFreeBytesProp = "ro.sys.cp_min_free_bytes";
318 const uint64_t min_free_bytes_default = 100 * (1 << 20);  // 100 MiB
319 
320 const std::string kCommitOnFullProp = "ro.sys.cp_commit_on_full";
321 const bool commit_on_full_default = true;
322 
cp_healthDaemon(std::string mnt_pnt,std::string blk_device,bool is_fs_cp)323 static void cp_healthDaemon(std::string mnt_pnt, std::string blk_device, bool is_fs_cp) {
324     struct statvfs data;
325     uint32_t msleeptime = GetUintProperty(kSleepTimeProp, msleeptime_default, max_msleeptime);
326     uint64_t min_free_bytes =
327         GetUintProperty(kMinFreeBytesProp, min_free_bytes_default, (uint64_t)-1);
328     bool commit_on_full = GetBoolProperty(kCommitOnFullProp, commit_on_full_default);
329 
330     struct timespec req;
331     req.tv_sec = msleeptime / 1000;
332     msleeptime %= 1000;
333     req.tv_nsec = msleeptime * 1000000;
334     while (isCheckpointing) {
335         uint64_t free_bytes = 0;
336         if (is_fs_cp) {
337             statvfs(mnt_pnt.c_str(), &data);
338             free_bytes = ((uint64_t) data.f_bavail) * data.f_frsize;
339         } else {
340             std::string bow_device = fs_mgr_find_bow_device(blk_device);
341             if (!bow_device.empty()) {
342                 std::string content;
343                 if (android::base::ReadFileToString(bow_device + "/bow/free", &content)) {
344                     free_bytes = std::strtoull(content.c_str(), NULL, 10);
345                 }
346             }
347         }
348         if (free_bytes < min_free_bytes) {
349             if (commit_on_full) {
350                 LOG(INFO) << "Low space for checkpointing. Commiting changes";
351                 cp_commitChanges();
352                 break;
353             } else {
354                 LOG(INFO) << "Low space for checkpointing. Rebooting";
355                 cp_abortChanges("checkpoint,low_space", false);
356                 break;
357             }
358         }
359         nanosleep(&req, NULL);
360     }
361 }
362 
363 }  // namespace
364 
cp_prepareCheckpoint()365 Status cp_prepareCheckpoint() {
366     // Log to notify CTS - see b/137924328 for context
367     LOG(INFO) << "cp_prepareCheckpoint called";
368     std::lock_guard<std::mutex> lock(isCheckpointingLock);
369     if (!isCheckpointing) {
370         return Status::ok();
371     }
372 
373     Fstab mounts;
374     if (!ReadFstabFromFile("/proc/mounts", &mounts)) {
375         return error(EINVAL, "Failed to get /proc/mounts");
376     }
377 
378     for (const auto& mount_rec : mounts) {
379         const auto fstab_rec = GetEntryForMountPoint(&fstab_default, mount_rec.mount_point);
380         if (!fstab_rec) continue;
381 
382         if (fstab_rec->fs_mgr_flags.checkpoint_blk) {
383             android::base::unique_fd fd(
384                 TEMP_FAILURE_RETRY(open(mount_rec.mount_point.c_str(), O_RDONLY | O_CLOEXEC)));
385             if (fd == -1) {
386                 PLOG(ERROR) << "Failed to open mount point" << mount_rec.mount_point;
387                 continue;
388             }
389 
390             struct fstrim_range range = {};
391             range.len = ULLONG_MAX;
392             nsecs_t start = systemTime(SYSTEM_TIME_BOOTTIME);
393             if (ioctl(fd, FITRIM, &range)) {
394                 PLOG(ERROR) << "Failed to trim " << mount_rec.mount_point;
395                 continue;
396             }
397             nsecs_t time = systemTime(SYSTEM_TIME_BOOTTIME) - start;
398             LOG(INFO) << "Trimmed " << range.len << " bytes on " << mount_rec.mount_point << " in "
399                       << nanoseconds_to_milliseconds(time) << "ms for checkpoint";
400 
401             isBow &= setBowState(mount_rec.blk_device, "1");
402         }
403         if (fstab_rec->fs_mgr_flags.checkpoint_blk || fstab_rec->fs_mgr_flags.checkpoint_fs) {
404             std::thread(cp_healthDaemon, std::string(mount_rec.mount_point),
405                         std::string(mount_rec.blk_device),
406                         fstab_rec->fs_mgr_flags.checkpoint_fs == 1)
407                 .detach();
408         }
409     }
410     return Status::ok();
411 }
412 
413 namespace {
414 const int kSectorSize = 512;
415 
416 typedef uint64_t sector_t;
417 
418 struct log_entry {
419     sector_t source;  // in sectors of size kSectorSize
420     sector_t dest;    // in sectors of size kSectorSize
421     uint32_t size;    // in bytes
422     uint32_t checksum;
423 } __attribute__((packed));
424 
425 struct log_sector_v1_0 {
426     uint32_t magic;
427     uint16_t header_version;
428     uint16_t header_size;
429     uint32_t block_size;
430     uint32_t count;
431     uint32_t sequence;
432     uint64_t sector0;
433 } __attribute__((packed));
434 
435 // MAGIC is BOW in ascii
436 const int kMagic = 0x00574f42;
437 // Partially restored MAGIC is WOB in ascii
438 const int kPartialRestoreMagic = 0x00424f57;
439 
crc32(const void * data,size_t n_bytes,uint32_t * crc)440 void crc32(const void* data, size_t n_bytes, uint32_t* crc) {
441     static uint32_t table[0x100] = {
442         0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535,
443         0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD,
444         0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D,
445         0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
446         0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
447         0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
448         0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC,
449         0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
450         0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB,
451         0xB6662D3D,
452 
453         0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5,
454         0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
455         0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED,
456         0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
457         0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074,
458         0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC,
459         0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C,
460         0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
461         0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B,
462         0xC0BA6CAD,
463 
464         0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615,
465         0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D,
466         0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D,
467         0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
468         0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4,
469         0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C,
470         0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C,
471         0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
472         0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B,
473         0x5BDEAE1D,
474 
475         0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785,
476         0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D,
477         0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD,
478         0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
479         0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354,
480         0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
481         0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C,
482         0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
483         0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B,
484         0x2D02EF8D};
485 
486     for (size_t i = 0; i < n_bytes; ++i) {
487         *crc ^= ((uint8_t*)data)[i];
488         *crc = table[(uint8_t)*crc] ^ *crc >> 8;
489     }
490 }
491 
492 // A map of relocations.
493 // The map must be initialized so that relocations[0] = 0
494 // During restore, we replay the log records in reverse, copying from dest to
495 // source
496 // To validate, we must be able to read the 'dest' sectors as though they had
497 // been copied but without actually copying. This map represents how the sectors
498 // would have been moved. To read a sector s, find the index <= s and read
499 // relocations[index] + s - index
500 typedef std::map<sector_t, sector_t> Relocations;
501 
relocate(Relocations & relocations,sector_t dest,sector_t source,int count)502 void relocate(Relocations& relocations, sector_t dest, sector_t source, int count) {
503     // Find first one we're equal to or greater than
504     auto s = --relocations.upper_bound(source);
505 
506     // Take slice
507     Relocations slice;
508     slice[dest] = source - s->first + s->second;
509     ++s;
510 
511     // Add rest of elements
512     for (; s != relocations.end() && s->first < source + count; ++s)
513         slice[dest - source + s->first] = s->second;
514 
515     // Split range at end of dest
516     auto dest_end = --relocations.upper_bound(dest + count);
517     relocations[dest + count] = dest + count - dest_end->first + dest_end->second;
518 
519     // Remove all elements in [dest, dest + count)
520     relocations.erase(relocations.lower_bound(dest), relocations.lower_bound(dest + count));
521 
522     // Add new elements
523     relocations.insert(slice.begin(), slice.end());
524 }
525 
526 // A map of sectors that have been written to.
527 // The final entry must always be False.
528 // When we restart the restore after an interruption, we must take care that
529 // when we copy from dest to source, that the block we copy to was not
530 // previously copied from.
531 // i e. A->B C->A; If we replay this sequence, we end up copying C->B
532 // We must save our partial result whenever we finish a page, or when we copy
533 // to a location that was copied from earlier (our source is an earlier dest)
534 typedef std::map<sector_t, bool> Used_Sectors;
535 
checkCollision(Used_Sectors & used_sectors,sector_t start,sector_t end)536 bool checkCollision(Used_Sectors& used_sectors, sector_t start, sector_t end) {
537     auto second_overlap = used_sectors.upper_bound(start);
538     auto first_overlap = --second_overlap;
539 
540     if (first_overlap->second) {
541         return true;
542     } else if (second_overlap != used_sectors.end() && second_overlap->first < end) {
543         return true;
544     }
545     return false;
546 }
547 
markUsed(Used_Sectors & used_sectors,sector_t start,sector_t end)548 void markUsed(Used_Sectors& used_sectors, sector_t start, sector_t end) {
549     auto start_pos = used_sectors.insert_or_assign(start, true).first;
550     auto end_pos = used_sectors.insert_or_assign(end, false).first;
551 
552     if (start_pos == used_sectors.begin() || !std::prev(start_pos)->second) {
553         start_pos++;
554     }
555     if (std::next(end_pos) != used_sectors.end() && !std::next(end_pos)->second) {
556         end_pos++;
557     }
558     if (start_pos->first < end_pos->first) {
559         used_sectors.erase(start_pos, end_pos);
560     }
561 }
562 
563 // Restores the given log_entry's data from dest -> source
564 // If that entry is a log sector, set the magic to kPartialRestoreMagic and flush.
restoreSector(int device_fd,Used_Sectors & used_sectors,std::vector<char> & ls_buffer,log_entry * le,std::vector<char> & buffer)565 void restoreSector(int device_fd, Used_Sectors& used_sectors, std::vector<char>& ls_buffer,
566                    log_entry* le, std::vector<char>& buffer) {
567     log_sector_v1_0& ls = *reinterpret_cast<log_sector_v1_0*>(&ls_buffer[0]);
568     uint32_t index = le - ((log_entry*)&ls_buffer[ls.header_size]);
569     int count = (le->size - 1) / kSectorSize + 1;
570 
571     if (checkCollision(used_sectors, le->source, le->source + count)) {
572         fsync(device_fd);
573         lseek64(device_fd, 0, SEEK_SET);
574         ls.count = index + 1;
575         ls.magic = kPartialRestoreMagic;
576         write(device_fd, &ls_buffer[0], ls.block_size);
577         fsync(device_fd);
578         used_sectors.clear();
579         used_sectors[0] = false;
580     }
581 
582     markUsed(used_sectors, le->dest, le->dest + count);
583 
584     if (index == 0 && ls.sequence != 0) {
585         log_sector_v1_0* next = reinterpret_cast<log_sector_v1_0*>(&buffer[0]);
586         if (next->magic == kMagic) {
587             next->magic = kPartialRestoreMagic;
588         }
589     }
590 
591     lseek64(device_fd, le->source * kSectorSize, SEEK_SET);
592     write(device_fd, &buffer[0], le->size);
593 
594     if (index == 0) {
595         fsync(device_fd);
596     }
597 }
598 
599 // Read from the device
600 // If we are validating, the read occurs as though the relocations had happened
601 // returns the amount asked for or an empty buffer on error. Partial reads are considered a failure
relocatedRead(int device_fd,Relocations const & relocations,bool validating,sector_t sector,uint32_t size,uint32_t block_size)602 std::vector<char> relocatedRead(int device_fd, Relocations const& relocations, bool validating,
603                                 sector_t sector, uint32_t size, uint32_t block_size) {
604     if (!validating) {
605         std::vector<char> buffer(size);
606         off64_t offset = sector * kSectorSize;
607         if (lseek64(device_fd, offset, SEEK_SET) != offset) {
608             return std::vector<char>();
609         }
610         if (read(device_fd, &buffer[0], size) != static_cast<ssize_t>(size)) {
611             return std::vector<char>();
612         }
613         return buffer;
614     }
615 
616     std::vector<char> buffer(size);
617     for (uint32_t i = 0; i < size; i += block_size, sector += block_size / kSectorSize) {
618         auto relocation = --relocations.upper_bound(sector);
619         off64_t offset = (sector + relocation->second - relocation->first) * kSectorSize;
620         if (lseek64(device_fd, offset, SEEK_SET) != offset) {
621             return std::vector<char>();
622         }
623         if (read(device_fd, &buffer[i], block_size) != static_cast<ssize_t>(block_size)) {
624             return std::vector<char>();
625         }
626     }
627 
628     return buffer;
629 }
630 
631 }  // namespace
632 
cp_restoreCheckpoint(const std::string & blockDevice,int restore_limit)633 Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) {
634     bool validating = true;
635     std::string action = "Validating";
636     int restore_count = 0;
637 
638     for (;;) {
639         Relocations relocations;
640         relocations[0] = 0;
641         Status status = Status::ok();
642 
643         LOG(INFO) << action << " checkpoint on " << blockDevice;
644         base::unique_fd device_fd(open(blockDevice.c_str(), O_RDWR | O_CLOEXEC));
645         if (device_fd < 0) return error("Cannot open " + blockDevice);
646 
647         log_sector_v1_0 original_ls;
648         if (read(device_fd, reinterpret_cast<char*>(&original_ls), sizeof(original_ls)) !=
649             sizeof(original_ls)) {
650             return error(EINVAL, "Cannot read sector");
651         }
652         if (original_ls.magic == kPartialRestoreMagic) {
653             validating = false;
654             action = "Restoring";
655         } else if (original_ls.magic != kMagic) {
656             return error(EINVAL, "No magic");
657         }
658 
659         if (original_ls.block_size < sizeof(log_sector_v1_0)) {
660             return error(EINVAL, "Block size is invalid");
661         }
662 
663         LOG(INFO) << action << " " << original_ls.sequence << " log sectors";
664 
665         for (int sequence = original_ls.sequence; sequence >= 0 && status.isOk(); sequence--) {
666             auto ls_buffer = relocatedRead(device_fd, relocations, validating, 0,
667                                            original_ls.block_size, original_ls.block_size);
668             if (ls_buffer.size() != original_ls.block_size) {
669                 status = error(EINVAL, "Failed to read log sector");
670                 break;
671             }
672             log_sector_v1_0& ls = *reinterpret_cast<log_sector_v1_0*>(&ls_buffer[0]);
673 
674             Used_Sectors used_sectors;
675             used_sectors[0] = false;
676 
677             if (ls.magic != kMagic && (ls.magic != kPartialRestoreMagic || validating)) {
678                 status = error(EINVAL, "No magic");
679                 break;
680             }
681 
682             if (ls.block_size != original_ls.block_size) {
683                 status = error(EINVAL, "Block size mismatch");
684                 break;
685             }
686 
687             if ((int)ls.sequence != sequence) {
688                 status = error(EINVAL, "Expecting log sector " + std::to_string(sequence) +
689                                            " but got " + std::to_string(ls.sequence));
690                 break;
691             }
692 
693             if (ls.header_size < sizeof(log_sector_v1_0) || ls.header_size > ls.block_size) {
694                 status = error(EINVAL, "Log sector header size is invalid");
695                 break;
696             }
697             if (ls.count < 1 || ls.count > (ls.block_size - ls.header_size) / sizeof(log_entry)) {
698                 status = error(EINVAL, "Log sector count is invalid");
699                 break;
700             }
701             LOG(INFO) << action << " from log sector " << ls.sequence;
702             for (log_entry* le =
703                      reinterpret_cast<log_entry*>(&ls_buffer[ls.header_size]) + ls.count - 1;
704                  le >= reinterpret_cast<log_entry*>(&ls_buffer[ls.header_size]); --le) {
705                 // This is very noisy - limit to DEBUG only
706                 LOG(VERBOSE) << action << " " << le->size << " bytes from sector " << le->dest
707                              << " to " << le->source << " with checksum " << std::hex
708                              << le->checksum;
709 
710                 if (ls.block_size > UINT_MAX - le->size || le->size < ls.block_size) {
711                     status = error(EINVAL, "log entry is invalid");
712                     break;
713                 }
714                 auto buffer = relocatedRead(device_fd, relocations, validating, le->dest, le->size,
715                                             ls.block_size);
716                 if (buffer.size() != le->size) {
717                     status = error(EINVAL, "Failed to read sector");
718                     break;
719                 }
720                 uint32_t checksum = le->source / (ls.block_size / kSectorSize);
721                 for (size_t i = 0; i < le->size; i += ls.block_size) {
722                     crc32(&buffer[i], ls.block_size, &checksum);
723                 }
724 
725                 if (le->checksum && checksum != le->checksum) {
726                     status = error(EINVAL, "Checksums don't match");
727                     break;
728                 }
729 
730                 if (validating) {
731                     relocate(relocations, le->source, le->dest, (le->size - 1) / kSectorSize + 1);
732                 } else {
733                     restoreSector(device_fd, used_sectors, ls_buffer, le, buffer);
734                     restore_count++;
735                     if (restore_limit && restore_count >= restore_limit) {
736                         status = error(EAGAIN, "Hit the test limit");
737                         break;
738                     }
739                 }
740             }
741         }
742 
743         if (!status.isOk()) {
744             if (!validating) {
745                 LOG(ERROR) << "Checkpoint restore failed even though checkpoint validation passed";
746                 return status;
747             }
748 
749             LOG(WARNING) << "Checkpoint validation failed - attempting to roll forward";
750             auto buffer = relocatedRead(device_fd, relocations, false, original_ls.sector0,
751                                         original_ls.block_size, original_ls.block_size);
752             if (buffer.size() != original_ls.block_size) {
753                 return error(EINVAL, "Failed to read original sector");
754             }
755 
756             if (lseek64(device_fd, 0, SEEK_SET) != 0) {
757                 return error(EINVAL, "Failed to seek to sector 0");
758             }
759             if (write(device_fd, &buffer[0], original_ls.block_size) !=
760                 static_cast<ssize_t>(original_ls.block_size)) {
761                 return error(EINVAL, "Failed to write original sector");
762             }
763             return Status::ok();
764         }
765 
766         if (!validating) break;
767 
768         validating = false;
769         action = "Restoring";
770     }
771 
772     return Status::ok();
773 }
774 
cp_markBootAttempt()775 Status cp_markBootAttempt() {
776     std::string oldContent, newContent;
777     int retry = 0;
778     struct stat st;
779     int result = stat(kMetadataCPFile.c_str(), &st);
780 
781     // If the file doesn't exist, we aren't managing a checkpoint retry counter
782     if (result != 0) return Status::ok();
783     if (!android::base::ReadFileToString(kMetadataCPFile, &oldContent))
784         return error("Failed to read checkpoint file");
785     std::string retryContent = oldContent.substr(0, oldContent.find_first_of(" "));
786 
787     if (!android::base::ParseInt(retryContent, &retry))
788         return error(EINVAL, "Could not parse retry count");
789     if (retry > 0) {
790         retry--;
791 
792         newContent = std::to_string(retry);
793         if (!android::base::WriteStringToFile(newContent, kMetadataCPFile))
794             return error("Could not write checkpoint file");
795     }
796     return Status::ok();
797 }
798 
cp_resetCheckpoint()799 void cp_resetCheckpoint() {
800     std::lock_guard<std::mutex> lock(isCheckpointingLock);
801     needsCheckpointWasCalled = false;
802 }
803 
804 }  // namespace vold
805 }  // namespace android
806