1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "fs_mgr.h"
18 
19 #include <dirent.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23 #include <libgen.h>
24 #include <selinux/selinux.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/ioctl.h>
29 #include <sys/mount.h>
30 #include <sys/stat.h>
31 #include <sys/swap.h>
32 #include <sys/types.h>
33 #include <sys/utsname.h>
34 #include <sys/wait.h>
35 #include <time.h>
36 #include <unistd.h>
37 
38 #include <array>
39 #include <chrono>
40 #include <functional>
41 #include <map>
42 #include <memory>
43 #include <numeric>
44 #include <string>
45 #include <string_view>
46 #include <thread>
47 #include <utility>
48 #include <vector>
49 
50 #include <android-base/chrono_utils.h>
51 #include <android-base/file.h>
52 #include <android-base/properties.h>
53 #include <android-base/stringprintf.h>
54 #include <android-base/strings.h>
55 #include <android-base/unique_fd.h>
56 #include <cutils/android_filesystem_config.h>
57 #include <cutils/android_reboot.h>
58 #include <cutils/partition_utils.h>
59 #include <cutils/properties.h>
60 #include <ext4_utils/ext4.h>
61 #include <ext4_utils/ext4_sb.h>
62 #include <ext4_utils/ext4_utils.h>
63 #include <ext4_utils/wipe.h>
64 #include <fs_avb/fs_avb.h>
65 #include <fs_mgr/file_wait.h>
66 #include <fs_mgr_overlayfs.h>
67 #include <fscrypt/fscrypt.h>
68 #include <libdm/dm.h>
69 #include <libdm/loop_control.h>
70 #include <liblp/metadata_format.h>
71 #include <linux/fs.h>
72 #include <linux/loop.h>
73 #include <linux/magic.h>
74 #include <log/log_properties.h>
75 #include <logwrap/logwrap.h>
76 
77 #include "blockdev.h"
78 #include "fs_mgr_priv.h"
79 
80 #define E2FSCK_BIN      "/system/bin/e2fsck"
81 #define F2FS_FSCK_BIN   "/system/bin/fsck.f2fs"
82 #define MKSWAP_BIN      "/system/bin/mkswap"
83 #define TUNE2FS_BIN     "/system/bin/tune2fs"
84 #define RESIZE2FS_BIN "/system/bin/resize2fs"
85 
86 #define FSCK_LOG_FILE   "/dev/fscklogs/log"
87 
88 #define ZRAM_CONF_DEV   "/sys/block/zram0/disksize"
89 #define ZRAM_CONF_MCS   "/sys/block/zram0/max_comp_streams"
90 #define ZRAM_BACK_DEV   "/sys/block/zram0/backing_dev"
91 
92 #define SYSFS_EXT4_VERITY "/sys/fs/ext4/features/verity"
93 #define SYSFS_EXT4_CASEFOLD "/sys/fs/ext4/features/casefold"
94 
95 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(*(a)))
96 
97 using android::base::Basename;
98 using android::base::GetBoolProperty;
99 using android::base::GetUintProperty;
100 using android::base::Realpath;
101 using android::base::SetProperty;
102 using android::base::StartsWith;
103 using android::base::StringPrintf;
104 using android::base::Timer;
105 using android::base::unique_fd;
106 using android::dm::DeviceMapper;
107 using android::dm::DmDeviceState;
108 using android::dm::DmTargetLinear;
109 using android::dm::LoopControl;
110 
111 // Realistically, this file should be part of the android::fs_mgr namespace;
112 using namespace android::fs_mgr;
113 
114 using namespace std::literals;
115 
116 // record fs stat
117 enum FsStatFlags {
118     FS_STAT_IS_EXT4 = 0x0001,
119     FS_STAT_NEW_IMAGE_VERSION = 0x0002,
120     FS_STAT_E2FSCK_F_ALWAYS = 0x0004,
121     FS_STAT_UNCLEAN_SHUTDOWN = 0x0008,
122     FS_STAT_QUOTA_ENABLED = 0x0010,
123     FS_STAT_RO_MOUNT_FAILED = 0x0040,
124     FS_STAT_RO_UNMOUNT_FAILED = 0x0080,
125     FS_STAT_FULL_MOUNT_FAILED = 0x0100,
126     FS_STAT_FSCK_FAILED = 0x0200,
127     FS_STAT_FSCK_FS_FIXED = 0x0400,
128     FS_STAT_INVALID_MAGIC = 0x0800,
129     FS_STAT_TOGGLE_QUOTAS_FAILED = 0x10000,
130     FS_STAT_SET_RESERVED_BLOCKS_FAILED = 0x20000,
131     FS_STAT_ENABLE_ENCRYPTION_FAILED = 0x40000,
132     FS_STAT_ENABLE_VERITY_FAILED = 0x80000,
133     FS_STAT_ENABLE_CASEFOLD_FAILED = 0x100000,
134     FS_STAT_ENABLE_METADATA_CSUM_FAILED = 0x200000,
135 };
136 
log_fs_stat(const std::string & blk_device,int fs_stat)137 static void log_fs_stat(const std::string& blk_device, int fs_stat) {
138     std::string msg =
139             android::base::StringPrintf("\nfs_stat,%s,0x%x\n", blk_device.c_str(), fs_stat);
140     android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(FSCK_LOG_FILE, O_WRONLY | O_CLOEXEC |
141                                                         O_APPEND | O_CREAT, 0664)));
142     if (fd == -1 || !android::base::WriteStringToFd(msg, fd)) {
143         LWARNING << __FUNCTION__ << "() cannot log " << msg;
144     }
145 }
146 
is_extfs(const std::string & fs_type)147 static bool is_extfs(const std::string& fs_type) {
148     return fs_type == "ext4" || fs_type == "ext3" || fs_type == "ext2";
149 }
150 
is_f2fs(const std::string & fs_type)151 static bool is_f2fs(const std::string& fs_type) {
152     return fs_type == "f2fs";
153 }
154 
realpath(const std::string & blk_device)155 static std::string realpath(const std::string& blk_device) {
156     std::string real_path;
157     if (!Realpath(blk_device, &real_path)) {
158         real_path = blk_device;
159     }
160     return real_path;
161 }
162 
should_force_check(int fs_stat)163 static bool should_force_check(int fs_stat) {
164     return fs_stat &
165            (FS_STAT_E2FSCK_F_ALWAYS | FS_STAT_UNCLEAN_SHUTDOWN | FS_STAT_QUOTA_ENABLED |
166             FS_STAT_RO_MOUNT_FAILED | FS_STAT_RO_UNMOUNT_FAILED | FS_STAT_FULL_MOUNT_FAILED |
167             FS_STAT_FSCK_FAILED | FS_STAT_TOGGLE_QUOTAS_FAILED |
168             FS_STAT_SET_RESERVED_BLOCKS_FAILED | FS_STAT_ENABLE_ENCRYPTION_FAILED);
169 }
170 
umount_retry(const std::string & mount_point)171 static bool umount_retry(const std::string& mount_point) {
172     int retry_count = 5;
173     bool umounted = false;
174 
175     while (retry_count-- > 0) {
176         umounted = umount(mount_point.c_str()) == 0;
177         if (umounted) {
178             LINFO << __FUNCTION__ << "(): unmount(" << mount_point << ") succeeded";
179             break;
180         }
181         PERROR << __FUNCTION__ << "(): umount(" << mount_point << ") failed";
182         if (retry_count) sleep(1);
183     }
184     return umounted;
185 }
186 
check_fs(const std::string & blk_device,const std::string & fs_type,const std::string & target,int * fs_stat)187 static void check_fs(const std::string& blk_device, const std::string& fs_type,
188                      const std::string& target, int* fs_stat) {
189     int status;
190     int ret;
191     long tmpmnt_flags = MS_NOATIME | MS_NOEXEC | MS_NOSUID;
192     auto tmpmnt_opts = "errors=remount-ro"s;
193     const char* e2fsck_argv[] = {E2FSCK_BIN, "-y", blk_device.c_str()};
194     const char* e2fsck_forced_argv[] = {E2FSCK_BIN, "-f", "-y", blk_device.c_str()};
195 
196     if (*fs_stat & FS_STAT_INVALID_MAGIC) {  // will fail, so do not try
197         return;
198     }
199 
200     Timer t;
201     /* Check for the types of filesystems we know how to check */
202     if (is_extfs(fs_type)) {
203         /*
204          * First try to mount and unmount the filesystem.  We do this because
205          * the kernel is more efficient than e2fsck in running the journal and
206          * processing orphaned inodes, and on at least one device with a
207          * performance issue in the emmc firmware, it can take e2fsck 2.5 minutes
208          * to do what the kernel does in about a second.
209          *
210          * After mounting and unmounting the filesystem, run e2fsck, and if an
211          * error is recorded in the filesystem superblock, e2fsck will do a full
212          * check.  Otherwise, it does nothing.  If the kernel cannot mount the
213          * filesytsem due to an error, e2fsck is still run to do a full check
214          * fix the filesystem.
215          */
216         if (!(*fs_stat & FS_STAT_FULL_MOUNT_FAILED)) {  // already tried if full mount failed
217             errno = 0;
218             if (fs_type == "ext4") {
219                 // This option is only valid with ext4
220                 tmpmnt_opts += ",nomblk_io_submit";
221             }
222             ret = mount(blk_device.c_str(), target.c_str(), fs_type.c_str(), tmpmnt_flags,
223                         tmpmnt_opts.c_str());
224             PINFO << __FUNCTION__ << "(): mount(" << blk_device << "," << target << "," << fs_type
225                   << ")=" << ret;
226             if (ret) {
227                 *fs_stat |= FS_STAT_RO_MOUNT_FAILED;
228             } else if (!umount_retry(target)) {
229                 // boot may fail but continue and leave it to later stage for now.
230                 PERROR << __FUNCTION__ << "(): umount(" << target << ") timed out";
231                 *fs_stat |= FS_STAT_RO_UNMOUNT_FAILED;
232             }
233         }
234 
235         /*
236          * Some system images do not have e2fsck for licensing reasons
237          * (e.g. recent SDK system images). Detect these and skip the check.
238          */
239         if (access(E2FSCK_BIN, X_OK)) {
240             LINFO << "Not running " << E2FSCK_BIN << " on " << realpath(blk_device)
241                   << " (executable not in system image)";
242         } else {
243             LINFO << "Running " << E2FSCK_BIN << " on " << realpath(blk_device);
244             if (should_force_check(*fs_stat)) {
245                 ret = logwrap_fork_execvp(ARRAY_SIZE(e2fsck_forced_argv), e2fsck_forced_argv,
246                                           &status, false, LOG_KLOG | LOG_FILE, false,
247                                           FSCK_LOG_FILE);
248             } else {
249                 ret = logwrap_fork_execvp(ARRAY_SIZE(e2fsck_argv), e2fsck_argv, &status, false,
250                                           LOG_KLOG | LOG_FILE, false, FSCK_LOG_FILE);
251             }
252 
253             if (ret < 0) {
254                 /* No need to check for error in fork, we can't really handle it now */
255                 LERROR << "Failed trying to run " << E2FSCK_BIN;
256                 *fs_stat |= FS_STAT_FSCK_FAILED;
257             } else if (status != 0) {
258                 LINFO << "e2fsck returned status 0x" << std::hex << status;
259                 *fs_stat |= FS_STAT_FSCK_FS_FIXED;
260             }
261         }
262     } else if (is_f2fs(fs_type)) {
263         const char* f2fs_fsck_argv[] = {F2FS_FSCK_BIN,     "-a", "-c", "10000", "--debug-cache",
264                                         blk_device.c_str()};
265         const char* f2fs_fsck_forced_argv[] = {
266                 F2FS_FSCK_BIN, "-f", "-c", "10000", "--debug-cache", blk_device.c_str()};
267 
268         if (access(F2FS_FSCK_BIN, X_OK)) {
269             LINFO << "Not running " << F2FS_FSCK_BIN << " on " << realpath(blk_device)
270                   << " (executable not in system image)";
271         } else {
272             if (should_force_check(*fs_stat)) {
273                 LINFO << "Running " << F2FS_FSCK_BIN << " -f -c 10000 --debug-cache "
274                       << realpath(blk_device);
275                 ret = logwrap_fork_execvp(ARRAY_SIZE(f2fs_fsck_forced_argv), f2fs_fsck_forced_argv,
276                                           &status, false, LOG_KLOG | LOG_FILE, false,
277                                           FSCK_LOG_FILE);
278             } else {
279                 LINFO << "Running " << F2FS_FSCK_BIN << " -a -c 10000 --debug-cache "
280                       << realpath(blk_device);
281                 ret = logwrap_fork_execvp(ARRAY_SIZE(f2fs_fsck_argv), f2fs_fsck_argv, &status,
282                                           false, LOG_KLOG | LOG_FILE, false, FSCK_LOG_FILE);
283             }
284             if (ret < 0) {
285                 /* No need to check for error in fork, we can't really handle it now */
286                 LERROR << "Failed trying to run " << F2FS_FSCK_BIN;
287                 *fs_stat |= FS_STAT_FSCK_FAILED;
288             } else if (status != 0) {
289                 LINFO << F2FS_FSCK_BIN << " returned status 0x" << std::hex << status;
290                 *fs_stat |= FS_STAT_FSCK_FS_FIXED;
291             }
292         }
293     }
294     android::base::SetProperty("ro.boottime.init.fsck." + Basename(target),
295                                std::to_string(t.duration().count()));
296     return;
297 }
298 
ext4_blocks_count(const struct ext4_super_block * es)299 static ext4_fsblk_t ext4_blocks_count(const struct ext4_super_block* es) {
300     return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
301            le32_to_cpu(es->s_blocks_count_lo);
302 }
303 
ext4_r_blocks_count(const struct ext4_super_block * es)304 static ext4_fsblk_t ext4_r_blocks_count(const struct ext4_super_block* es) {
305     return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
306            le32_to_cpu(es->s_r_blocks_count_lo);
307 }
308 
is_ext4_superblock_valid(const struct ext4_super_block * es)309 static bool is_ext4_superblock_valid(const struct ext4_super_block* es) {
310     if (es->s_magic != EXT4_SUPER_MAGIC) return false;
311     if (es->s_rev_level != EXT4_DYNAMIC_REV && es->s_rev_level != EXT4_GOOD_OLD_REV) return false;
312     if (EXT4_INODES_PER_GROUP(es) == 0) return false;
313     return true;
314 }
315 
316 // Read the primary superblock from an ext4 filesystem.  On failure return
317 // false.  If it's not an ext4 filesystem, also set FS_STAT_INVALID_MAGIC.
read_ext4_superblock(const std::string & blk_device,struct ext4_super_block * sb,int * fs_stat)318 static bool read_ext4_superblock(const std::string& blk_device, struct ext4_super_block* sb,
319                                  int* fs_stat) {
320     android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(blk_device.c_str(), O_RDONLY | O_CLOEXEC)));
321 
322     if (fd < 0) {
323         PERROR << "Failed to open '" << blk_device << "'";
324         return false;
325     }
326 
327     if (TEMP_FAILURE_RETRY(pread(fd, sb, sizeof(*sb), 1024)) != sizeof(*sb)) {
328         PERROR << "Can't read '" << blk_device << "' superblock";
329         return false;
330     }
331 
332     if (!is_ext4_superblock_valid(sb)) {
333         LINFO << "Invalid ext4 superblock on '" << blk_device << "'";
334         // not a valid fs, tune2fs, fsck, and mount  will all fail.
335         *fs_stat |= FS_STAT_INVALID_MAGIC;
336         return false;
337     }
338     *fs_stat |= FS_STAT_IS_EXT4;
339     LINFO << "superblock s_max_mnt_count:" << sb->s_max_mnt_count << "," << blk_device;
340     if (sb->s_max_mnt_count == 0xffff) {  // -1 (int16) in ext2, but uint16 in ext4
341         *fs_stat |= FS_STAT_NEW_IMAGE_VERSION;
342     }
343     return true;
344 }
345 
346 // exported silent version of the above that just answer the question is_ext4
fs_mgr_is_ext4(const std::string & blk_device)347 bool fs_mgr_is_ext4(const std::string& blk_device) {
348     android::base::ErrnoRestorer restore;
349     android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(blk_device.c_str(), O_RDONLY | O_CLOEXEC)));
350     if (fd < 0) return false;
351     ext4_super_block sb;
352     if (TEMP_FAILURE_RETRY(pread(fd, &sb, sizeof(sb), 1024)) != sizeof(sb)) return false;
353     if (!is_ext4_superblock_valid(&sb)) return false;
354     return true;
355 }
356 
357 // Some system images do not have tune2fs for licensing reasons.
358 // Detect these and skip running it.
tune2fs_available(void)359 static bool tune2fs_available(void) {
360     return access(TUNE2FS_BIN, X_OK) == 0;
361 }
362 
run_command(const char * argv[],int argc)363 static bool run_command(const char* argv[], int argc) {
364     int ret;
365 
366     ret = logwrap_fork_execvp(argc, argv, nullptr, false, LOG_KLOG, false, nullptr);
367     return ret == 0;
368 }
369 
370 // Enable/disable quota support on the filesystem if needed.
tune_quota(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)371 static void tune_quota(const std::string& blk_device, const FstabEntry& entry,
372                        const struct ext4_super_block* sb, int* fs_stat) {
373     bool has_quota = (sb->s_feature_ro_compat & cpu_to_le32(EXT4_FEATURE_RO_COMPAT_QUOTA)) != 0;
374     bool want_quota = entry.fs_mgr_flags.quota;
375     // Enable projid support by default
376     bool want_projid = true;
377     if (has_quota == want_quota) {
378         return;
379     }
380 
381     if (!tune2fs_available()) {
382         LERROR << "Unable to " << (want_quota ? "enable" : "disable") << " quotas on " << blk_device
383                << " because " TUNE2FS_BIN " is missing";
384         return;
385     }
386 
387     const char* argv[] = {TUNE2FS_BIN, nullptr, nullptr, blk_device.c_str()};
388 
389     if (want_quota) {
390         LINFO << "Enabling quotas on " << blk_device;
391         argv[1] = "-Oquota";
392         // Once usr/grp unneeded, make just prjquota to save overhead
393         if (want_projid)
394             argv[2] = "-Qusrquota,grpquota,prjquota";
395         else
396             argv[2] = "-Qusrquota,grpquota";
397         *fs_stat |= FS_STAT_QUOTA_ENABLED;
398     } else {
399         LINFO << "Disabling quotas on " << blk_device;
400         argv[1] = "-O^quota";
401         argv[2] = "-Q^usrquota,^grpquota,^prjquota";
402     }
403 
404     if (!run_command(argv, ARRAY_SIZE(argv))) {
405         LERROR << "Failed to run " TUNE2FS_BIN " to " << (want_quota ? "enable" : "disable")
406                << " quotas on " << blk_device;
407         *fs_stat |= FS_STAT_TOGGLE_QUOTAS_FAILED;
408     }
409 }
410 
411 // Set the number of reserved filesystem blocks if needed.
tune_reserved_size(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)412 static void tune_reserved_size(const std::string& blk_device, const FstabEntry& entry,
413                                const struct ext4_super_block* sb, int* fs_stat) {
414     if (entry.reserved_size == 0) {
415         return;
416     }
417 
418     // The size to reserve is given in the fstab, but we won't reserve more
419     // than 2% of the filesystem.
420     const uint64_t max_reserved_blocks = ext4_blocks_count(sb) * 0.02;
421     uint64_t reserved_blocks = entry.reserved_size / EXT4_BLOCK_SIZE(sb);
422 
423     if (reserved_blocks > max_reserved_blocks) {
424         LWARNING << "Reserved blocks " << reserved_blocks << " is too large; "
425                  << "capping to " << max_reserved_blocks;
426         reserved_blocks = max_reserved_blocks;
427     }
428 
429     if ((ext4_r_blocks_count(sb) == reserved_blocks) && (sb->s_def_resgid == AID_RESERVED_DISK)) {
430         return;
431     }
432 
433     if (!tune2fs_available()) {
434         LERROR << "Unable to set the number of reserved blocks on " << blk_device
435                << " because " TUNE2FS_BIN " is missing";
436         return;
437     }
438 
439     LINFO << "Setting reserved block count on " << blk_device << " to " << reserved_blocks;
440 
441     auto reserved_blocks_str = std::to_string(reserved_blocks);
442     auto reserved_gid_str = std::to_string(AID_RESERVED_DISK);
443     const char* argv[] = {
444             TUNE2FS_BIN,       "-r", reserved_blocks_str.c_str(), "-g", reserved_gid_str.c_str(),
445             blk_device.c_str()};
446     if (!run_command(argv, ARRAY_SIZE(argv))) {
447         LERROR << "Failed to run " TUNE2FS_BIN " to set the number of reserved blocks on "
448                << blk_device;
449         *fs_stat |= FS_STAT_SET_RESERVED_BLOCKS_FAILED;
450     }
451 }
452 
453 // Enable file-based encryption if needed.
tune_encrypt(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)454 static void tune_encrypt(const std::string& blk_device, const FstabEntry& entry,
455                          const struct ext4_super_block* sb, int* fs_stat) {
456     if (!entry.fs_mgr_flags.file_encryption) {
457         return;  // Nothing needs done.
458     }
459     std::vector<std::string> features_needed;
460     if ((sb->s_feature_incompat & cpu_to_le32(EXT4_FEATURE_INCOMPAT_ENCRYPT)) == 0) {
461         features_needed.emplace_back("encrypt");
462     }
463     android::fscrypt::EncryptionOptions options;
464     if (!android::fscrypt::ParseOptions(entry.encryption_options, &options)) {
465         LERROR << "Unable to parse encryption options on " << blk_device << ": "
466                << entry.encryption_options;
467         return;
468     }
469     if ((options.flags &
470          (FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 | FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) != 0) {
471         // We can only use this policy on ext4 if the "stable_inodes" feature
472         // is set on the filesystem, otherwise shrinking will break encrypted files.
473         if ((sb->s_feature_compat & cpu_to_le32(EXT4_FEATURE_COMPAT_STABLE_INODES)) == 0) {
474             features_needed.emplace_back("stable_inodes");
475         }
476     }
477     if (features_needed.size() == 0) {
478         return;
479     }
480     if (!tune2fs_available()) {
481         LERROR << "Unable to enable ext4 encryption on " << blk_device
482                << " because " TUNE2FS_BIN " is missing";
483         return;
484     }
485 
486     auto flags = android::base::Join(features_needed, ',');
487     auto flag_arg = "-O"s + flags;
488     const char* argv[] = {TUNE2FS_BIN, flag_arg.c_str(), blk_device.c_str()};
489 
490     LINFO << "Enabling ext4 flags " << flags << " on " << blk_device;
491     if (!run_command(argv, ARRAY_SIZE(argv))) {
492         LERROR << "Failed to run " TUNE2FS_BIN " to enable "
493                << "ext4 flags " << flags << " on " << blk_device;
494         *fs_stat |= FS_STAT_ENABLE_ENCRYPTION_FAILED;
495     }
496 }
497 
498 // Enable fs-verity if needed.
tune_verity(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)499 static void tune_verity(const std::string& blk_device, const FstabEntry& entry,
500                         const struct ext4_super_block* sb, int* fs_stat) {
501     bool has_verity = (sb->s_feature_ro_compat & cpu_to_le32(EXT4_FEATURE_RO_COMPAT_VERITY)) != 0;
502     bool want_verity = entry.fs_mgr_flags.fs_verity;
503 
504     if (has_verity || !want_verity) {
505         return;
506     }
507 
508     std::string verity_support;
509     if (!android::base::ReadFileToString(SYSFS_EXT4_VERITY, &verity_support)) {
510         LERROR << "Failed to open " << SYSFS_EXT4_VERITY;
511         return;
512     }
513 
514     if (!(android::base::Trim(verity_support) == "supported")) {
515         LERROR << "Current ext4 verity not supported by kernel";
516         return;
517     }
518 
519     if (!tune2fs_available()) {
520         LERROR << "Unable to enable ext4 verity on " << blk_device
521                << " because " TUNE2FS_BIN " is missing";
522         return;
523     }
524 
525     LINFO << "Enabling ext4 verity on " << blk_device;
526 
527     const char* argv[] = {TUNE2FS_BIN, "-O", "verity", blk_device.c_str()};
528     if (!run_command(argv, ARRAY_SIZE(argv))) {
529         LERROR << "Failed to run " TUNE2FS_BIN " to enable "
530                << "ext4 verity on " << blk_device;
531         *fs_stat |= FS_STAT_ENABLE_VERITY_FAILED;
532     }
533 }
534 
535 // Enable casefold if needed.
tune_casefold(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)536 static void tune_casefold(const std::string& blk_device, const FstabEntry& entry,
537                           const struct ext4_super_block* sb, int* fs_stat) {
538     bool has_casefold = (sb->s_feature_incompat & cpu_to_le32(EXT4_FEATURE_INCOMPAT_CASEFOLD)) != 0;
539     bool wants_casefold =
540             android::base::GetBoolProperty("external_storage.casefold.enabled", false);
541 
542     if (entry.mount_point != "/data" || !wants_casefold || has_casefold) return;
543 
544     std::string casefold_support;
545     if (!android::base::ReadFileToString(SYSFS_EXT4_CASEFOLD, &casefold_support)) {
546         LERROR << "Failed to open " << SYSFS_EXT4_CASEFOLD;
547         return;
548     }
549 
550     if (!(android::base::Trim(casefold_support) == "supported")) {
551         LERROR << "Current ext4 casefolding not supported by kernel";
552         return;
553     }
554 
555     if (!tune2fs_available()) {
556         LERROR << "Unable to enable ext4 casefold on " << blk_device
557                << " because " TUNE2FS_BIN " is missing";
558         return;
559     }
560 
561     LINFO << "Enabling ext4 casefold on " << blk_device;
562 
563     const char* argv[] = {TUNE2FS_BIN, "-O", "casefold", "-E", "encoding=utf8", blk_device.c_str()};
564     if (!run_command(argv, ARRAY_SIZE(argv))) {
565         LERROR << "Failed to run " TUNE2FS_BIN " to enable "
566                << "ext4 casefold on " << blk_device;
567         *fs_stat |= FS_STAT_ENABLE_CASEFOLD_FAILED;
568     }
569 }
570 
resize2fs_available(void)571 static bool resize2fs_available(void) {
572     return access(RESIZE2FS_BIN, X_OK) == 0;
573 }
574 
575 // Enable metadata_csum
tune_metadata_csum(const std::string & blk_device,const FstabEntry & entry,const struct ext4_super_block * sb,int * fs_stat)576 static void tune_metadata_csum(const std::string& blk_device, const FstabEntry& entry,
577                                const struct ext4_super_block* sb, int* fs_stat) {
578     bool has_meta_csum =
579             (sb->s_feature_ro_compat & cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) != 0;
580     bool want_meta_csum = entry.fs_mgr_flags.ext_meta_csum;
581 
582     if (has_meta_csum || !want_meta_csum) return;
583 
584     if (!tune2fs_available()) {
585         LERROR << "Unable to enable metadata_csum on " << blk_device
586                << " because " TUNE2FS_BIN " is missing";
587         return;
588     }
589     if (!resize2fs_available()) {
590         LERROR << "Unable to enable metadata_csum on " << blk_device
591                << " because " RESIZE2FS_BIN " is missing";
592         return;
593     }
594 
595     LINFO << "Enabling ext4 metadata_csum on " << blk_device;
596 
597     // Must give `-T now` to prevent last_fsck_time from growing too large,
598     // otherwise, tune2fs won't enable metadata_csum.
599     const char* tune2fs_args[] = {TUNE2FS_BIN, "-O",        "metadata_csum,64bit,extent",
600                                   "-T",        "now", blk_device.c_str()};
601     const char* resize2fs_args[] = {RESIZE2FS_BIN, "-b", blk_device.c_str()};
602 
603     if (!run_command(tune2fs_args, ARRAY_SIZE(tune2fs_args))) {
604         LERROR << "Failed to run " TUNE2FS_BIN " to enable "
605                << "ext4 metadata_csum on " << blk_device;
606         *fs_stat |= FS_STAT_ENABLE_METADATA_CSUM_FAILED;
607     } else if (!run_command(resize2fs_args, ARRAY_SIZE(resize2fs_args))) {
608         LERROR << "Failed to run " RESIZE2FS_BIN " to enable "
609                << "ext4 metadata_csum on " << blk_device;
610         *fs_stat |= FS_STAT_ENABLE_METADATA_CSUM_FAILED;
611     }
612 }
613 
614 // Read the primary superblock from an f2fs filesystem.  On failure return
615 // false.  If it's not an f2fs filesystem, also set FS_STAT_INVALID_MAGIC.
616 #define F2FS_SUPER_OFFSET 1024
read_f2fs_superblock(const std::string & blk_device,int * fs_stat)617 static bool read_f2fs_superblock(const std::string& blk_device, int* fs_stat) {
618     android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(blk_device.c_str(), O_RDONLY | O_CLOEXEC)));
619     __le32 sb1, sb2;
620 
621     if (fd < 0) {
622         PERROR << "Failed to open '" << blk_device << "'";
623         return false;
624     }
625 
626     if (TEMP_FAILURE_RETRY(pread(fd, &sb1, sizeof(sb1), F2FS_SUPER_OFFSET)) != sizeof(sb1)) {
627         PERROR << "Can't read '" << blk_device << "' superblock1";
628         return false;
629     }
630     // F2FS only supports block_size=page_size case. So, it is safe to call
631     // `getpagesize()` and use that as size of super block.
632     if (TEMP_FAILURE_RETRY(pread(fd, &sb2, sizeof(sb2), getpagesize() + F2FS_SUPER_OFFSET)) !=
633         sizeof(sb2)) {
634         PERROR << "Can't read '" << blk_device << "' superblock2";
635         return false;
636     }
637 
638     if (sb1 != cpu_to_le32(F2FS_SUPER_MAGIC) && sb2 != cpu_to_le32(F2FS_SUPER_MAGIC)) {
639         LINFO << "Invalid f2fs superblock on '" << blk_device << "'";
640         *fs_stat |= FS_STAT_INVALID_MAGIC;
641         return false;
642     }
643     return true;
644 }
645 
646 // exported silent version of the above that just answer the question is_f2fs
fs_mgr_is_f2fs(const std::string & blk_device)647 bool fs_mgr_is_f2fs(const std::string& blk_device) {
648     android::base::ErrnoRestorer restore;
649     android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(blk_device.c_str(), O_RDONLY | O_CLOEXEC)));
650     if (fd < 0) return false;
651     __le32 sb;
652     if (TEMP_FAILURE_RETRY(pread(fd, &sb, sizeof(sb), F2FS_SUPER_OFFSET)) != sizeof(sb)) {
653         return false;
654     }
655     if (sb == cpu_to_le32(F2FS_SUPER_MAGIC)) return true;
656     if (TEMP_FAILURE_RETRY(pread(fd, &sb, sizeof(sb), getpagesize() + F2FS_SUPER_OFFSET)) !=
657         sizeof(sb)) {
658         return false;
659     }
660     return sb == cpu_to_le32(F2FS_SUPER_MAGIC);
661 }
662 
SetReadAheadSize(const std::string & entry_block_device,off64_t size_kb)663 static void SetReadAheadSize(const std::string& entry_block_device, off64_t size_kb) {
664     std::string block_device;
665     if (!Realpath(entry_block_device, &block_device)) {
666         PERROR << "Failed to realpath " << entry_block_device;
667         return;
668     }
669 
670     static constexpr std::string_view kDevBlockPrefix("/dev/block/");
671     if (!android::base::StartsWith(block_device, kDevBlockPrefix)) {
672         LWARNING << block_device << " is not a block device";
673         return;
674     }
675 
676     DeviceMapper& dm = DeviceMapper::Instance();
677     while (true) {
678         std::string block_name = block_device;
679         if (android::base::StartsWith(block_device, kDevBlockPrefix)) {
680             block_name = block_device.substr(kDevBlockPrefix.length());
681         }
682         std::string sys_partition =
683                 android::base::StringPrintf("/sys/class/block/%s/partition", block_name.c_str());
684         struct stat info;
685         if (lstat(sys_partition.c_str(), &info) == 0) {
686             // it has a partition like "sda12".
687             block_name += "/..";
688         }
689         std::string sys_ra = android::base::StringPrintf("/sys/class/block/%s/queue/read_ahead_kb",
690                                                          block_name.c_str());
691         std::string size = android::base::StringPrintf("%llu", (long long)size_kb);
692         android::base::WriteStringToFile(size, sys_ra.c_str());
693         LINFO << "Set readahead_kb: " << size << " on " << sys_ra;
694 
695         auto parent = dm.GetParentBlockDeviceByPath(block_device);
696         if (!parent) {
697             return;
698         }
699         block_device = *parent;
700     }
701 }
702 
703 //
704 // Mechanism to allow fsck to be triggered by setting ro.preventative_fsck
705 // Introduced to address b/305658663
706 // If the property value is not equal to the flag file contents, trigger
707 // fsck and store the property value in the flag file
708 // If we want to trigger again, simply change the property value
709 //
check_if_preventative_fsck_needed(const FstabEntry & entry)710 static bool check_if_preventative_fsck_needed(const FstabEntry& entry) {
711     const char* flag_file = "/metadata/vold/preventative_fsck";
712     if (entry.mount_point != "/data") return false;
713 
714     // Don't error check - both default to empty string, which is OK
715     std::string prop = android::base::GetProperty("ro.preventative_fsck", "");
716     std::string flag;
717     android::base::ReadFileToString(flag_file, &flag);
718     if (prop == flag) return false;
719     // fsck is run immediately, so assume it runs or there is some deeper problem
720     if (!android::base::WriteStringToFile(prop, flag_file))
721         PERROR << "Failed to write file " << flag_file;
722     LINFO << "Run preventative fsck on /data";
723     return true;
724 }
725 
726 //
727 // Prepare the filesystem on the given block device to be mounted.
728 //
729 // If the "check" option was given in the fstab record, or it seems that the
730 // filesystem was uncleanly shut down, we'll run fsck on the filesystem.
731 //
732 // If needed, we'll also enable (or disable) filesystem features as specified by
733 // the fstab record.
734 //
prepare_fs_for_mount(const std::string & blk_device,const FstabEntry & entry,const std::string & alt_mount_point="")735 static int prepare_fs_for_mount(const std::string& blk_device, const FstabEntry& entry,
736                                 const std::string& alt_mount_point = "") {
737     auto& mount_point = alt_mount_point.empty() ? entry.mount_point : alt_mount_point;
738     // We need this because sometimes we have legacy symlinks that are
739     // lingering around and need cleaning up.
740     struct stat info;
741     if (lstat(mount_point.c_str(), &info) == 0 && (info.st_mode & S_IFMT) == S_IFLNK) {
742         unlink(mount_point.c_str());
743     }
744     mkdir(mount_point.c_str(), 0755);
745 
746     // Don't need to return error, since it's a salt
747     if (entry.readahead_size_kb != -1) {
748         SetReadAheadSize(blk_device, entry.readahead_size_kb);
749     }
750 
751     int fs_stat = 0;
752 
753     if (is_extfs(entry.fs_type)) {
754         struct ext4_super_block sb;
755 
756         if (read_ext4_superblock(blk_device, &sb, &fs_stat)) {
757             if ((sb.s_feature_incompat & EXT4_FEATURE_INCOMPAT_RECOVER) != 0 ||
758                 (sb.s_state & EXT4_VALID_FS) == 0) {
759                 LINFO << "Filesystem on " << blk_device << " was not cleanly shutdown; "
760                       << "state flags: 0x" << std::hex << sb.s_state << ", "
761                       << "incompat feature flags: 0x" << std::hex << sb.s_feature_incompat;
762                 fs_stat |= FS_STAT_UNCLEAN_SHUTDOWN;
763             }
764 
765             // Note: quotas should be enabled before running fsck.
766             tune_quota(blk_device, entry, &sb, &fs_stat);
767         } else {
768             return fs_stat;
769         }
770     } else if (is_f2fs(entry.fs_type)) {
771         if (!read_f2fs_superblock(blk_device, &fs_stat)) {
772             return fs_stat;
773         }
774     }
775 
776     if (check_if_preventative_fsck_needed(entry) || entry.fs_mgr_flags.check ||
777         (fs_stat & (FS_STAT_UNCLEAN_SHUTDOWN | FS_STAT_QUOTA_ENABLED))) {
778         check_fs(blk_device, entry.fs_type, mount_point, &fs_stat);
779     }
780 
781     if (is_extfs(entry.fs_type) &&
782         (entry.reserved_size != 0 || entry.fs_mgr_flags.file_encryption ||
783          entry.fs_mgr_flags.fs_verity || entry.fs_mgr_flags.ext_meta_csum)) {
784         struct ext4_super_block sb;
785 
786         if (read_ext4_superblock(blk_device, &sb, &fs_stat)) {
787             tune_reserved_size(blk_device, entry, &sb, &fs_stat);
788             tune_encrypt(blk_device, entry, &sb, &fs_stat);
789             tune_verity(blk_device, entry, &sb, &fs_stat);
790             tune_casefold(blk_device, entry, &sb, &fs_stat);
791             tune_metadata_csum(blk_device, entry, &sb, &fs_stat);
792         }
793     }
794 
795     return fs_stat;
796 }
797 
798 // Mark the given block device as read-only, using the BLKROSET ioctl.
fs_mgr_set_blk_ro(const std::string & blockdev,bool readonly)799 bool fs_mgr_set_blk_ro(const std::string& blockdev, bool readonly) {
800     unique_fd fd(TEMP_FAILURE_RETRY(open(blockdev.c_str(), O_RDONLY | O_CLOEXEC)));
801     if (fd < 0) {
802         return false;
803     }
804 
805     int ON = readonly;
806     return ioctl(fd, BLKROSET, &ON) == 0;
807 }
808 
809 // Orange state means the device is unlocked, see the following link for details.
810 // https://source.android.com/security/verifiedboot/verified-boot#device_state
fs_mgr_is_device_unlocked()811 bool fs_mgr_is_device_unlocked() {
812     std::string verified_boot_state;
813     if (fs_mgr_get_boot_config("verifiedbootstate", &verified_boot_state)) {
814         return verified_boot_state == "orange";
815     }
816     return false;
817 }
818 
819 // __mount(): wrapper around the mount() system call which also
820 // sets the underlying block device to read-only if the mount is read-only.
821 // See "man 2 mount" for return values.
__mount(const std::string & source,const std::string & target,const FstabEntry & entry,bool read_only=false)822 static int __mount(const std::string& source, const std::string& target, const FstabEntry& entry,
823                    bool read_only = false) {
824     errno = 0;
825     unsigned long mountflags = entry.flags;
826     if (read_only) {
827         mountflags |= MS_RDONLY;
828     }
829     int ret = 0;
830     int save_errno = 0;
831     int gc_allowance = 0;
832     std::string opts;
833     std::string checkpoint_opts;
834     bool try_f2fs_gc_allowance = is_f2fs(entry.fs_type) && entry.fs_checkpoint_opts.length() > 0;
835     bool try_f2fs_fallback = false;
836     Timer t;
837 
838     do {
839         if (save_errno == EINVAL && (try_f2fs_gc_allowance || try_f2fs_fallback)) {
840             PINFO << "Kernel does not support " << checkpoint_opts << ", trying without.";
841             try_f2fs_gc_allowance = false;
842             // Attempt without gc allowance before dropping.
843             try_f2fs_fallback = !try_f2fs_fallback;
844         }
845         if (try_f2fs_gc_allowance) {
846             checkpoint_opts = entry.fs_checkpoint_opts + ":" + std::to_string(gc_allowance) + "%";
847         } else if (try_f2fs_fallback) {
848             checkpoint_opts = entry.fs_checkpoint_opts;
849         } else {
850             checkpoint_opts = "";
851         }
852         opts = entry.fs_options + checkpoint_opts;
853         if (save_errno == EAGAIN) {
854             PINFO << "Retrying mount (source=" << source << ",target=" << target
855                   << ",type=" << entry.fs_type << ", gc_allowance=" << gc_allowance << "%)=" << ret
856                   << "(" << save_errno << ")";
857         }
858 
859         // Let's get the raw dm target, if it's a symlink, since some existing applications
860         // rely on /proc/mounts to find the userdata's dm target path. Don't break that assumption.
861         std::string real_source;
862         if (!android::base::Realpath(source, &real_source)) {
863             real_source = source;
864         }
865         ret = mount(real_source.c_str(), target.c_str(), entry.fs_type.c_str(), mountflags,
866                     opts.c_str());
867         save_errno = errno;
868         if (try_f2fs_gc_allowance) gc_allowance += 10;
869     } while ((ret && save_errno == EAGAIN && gc_allowance <= 100) ||
870              (ret && save_errno == EINVAL && (try_f2fs_gc_allowance || try_f2fs_fallback)));
871     const char* target_missing = "";
872     const char* source_missing = "";
873     if (save_errno == ENOENT) {
874         if (access(target.c_str(), F_OK)) {
875             target_missing = "(missing)";
876         } else if (access(source.c_str(), F_OK)) {
877             source_missing = "(missing)";
878         }
879         errno = save_errno;
880     }
881     PINFO << __FUNCTION__ << "(source=" << source << source_missing << ",target=" << target
882           << target_missing << ",type=" << entry.fs_type << ")=" << ret;
883     if ((ret == 0) && (mountflags & MS_RDONLY) != 0) {
884         fs_mgr_set_blk_ro(source);
885     }
886     if (ret == 0) {
887         android::base::SetProperty("ro.boottime.init.mount." + Basename(target),
888                                    std::to_string(t.duration().count()));
889     }
890     errno = save_errno;
891     return ret;
892 }
893 
fs_match(const std::string & in1,const std::string & in2)894 static bool fs_match(const std::string& in1, const std::string& in2) {
895     if (in1.empty() || in2.empty()) {
896         return false;
897     }
898 
899     auto in1_end = in1.size() - 1;
900     while (in1_end > 0 && in1[in1_end] == '/') {
901         in1_end--;
902     }
903 
904     auto in2_end = in2.size() - 1;
905     while (in2_end > 0 && in2[in2_end] == '/') {
906         in2_end--;
907     }
908 
909     if (in1_end != in2_end) {
910         return false;
911     }
912 
913     for (size_t i = 0; i <= in1_end; ++i) {
914         if (in1[i] != in2[i]) {
915             return false;
916         }
917     }
918 
919     return true;
920 }
921 
should_use_metadata_encryption(const FstabEntry & entry)922 static bool should_use_metadata_encryption(const FstabEntry& entry) {
923     return !entry.metadata_key_dir.empty() && entry.fs_mgr_flags.file_encryption;
924 }
925 
926 // Tries to mount any of the consecutive fstab entries that match
927 // the mountpoint of the one given by fstab[start_idx].
928 //
929 // end_idx: On return, will be the last entry that was looked at.
930 // attempted_idx: On return, will indicate which fstab entry
931 //     succeeded. In case of failure, it will be the start_idx.
932 // Sets errno to match the 1st mount failure on failure.
mount_with_alternatives(Fstab & fstab,int start_idx,int * end_idx,int * attempted_idx)933 static bool mount_with_alternatives(Fstab& fstab, int start_idx, int* end_idx, int* attempted_idx) {
934     unsigned long i;
935     int mount_errno = 0;
936     bool mounted = false;
937 
938     // Hunt down an fstab entry for the same mount point that might succeed.
939     for (i = start_idx;
940          // We required that fstab entries for the same mountpoint be consecutive.
941          i < fstab.size() && fstab[start_idx].mount_point == fstab[i].mount_point; i++) {
942         // Don't try to mount/encrypt the same mount point again.
943         // Deal with alternate entries for the same point which are required to be all following
944         // each other.
945         if (mounted) {
946             LINFO << __FUNCTION__ << "(): skipping fstab dup mountpoint=" << fstab[i].mount_point
947                   << " rec[" << i << "].fs_type=" << fstab[i].fs_type << " already mounted as "
948                   << fstab[*attempted_idx].fs_type;
949             continue;
950         }
951 
952         // fstab[start_idx].blk_device is already updated to /dev/dm-<N> by
953         // AVB related functions. Copy it from start_idx to the current index i.
954         if ((i != start_idx) && fstab[i].fs_mgr_flags.logical &&
955             fstab[start_idx].fs_mgr_flags.logical &&
956             (fstab[i].logical_partition_name == fstab[start_idx].logical_partition_name)) {
957             fstab[i].blk_device = fstab[start_idx].blk_device;
958         }
959 
960         int fs_stat = prepare_fs_for_mount(fstab[i].blk_device, fstab[i]);
961         if (fs_stat & FS_STAT_INVALID_MAGIC) {
962             LERROR << __FUNCTION__
963                    << "(): skipping mount due to invalid magic, mountpoint=" << fstab[i].mount_point
964                    << " blk_dev=" << realpath(fstab[i].blk_device) << " rec[" << i
965                    << "].fs_type=" << fstab[i].fs_type;
966             mount_errno = EINVAL;  // continue bootup for metadata encryption
967             continue;
968         }
969 
970         int retry_count = 2;
971         const auto read_only = should_use_metadata_encryption(fstab[i]);
972         if (read_only) {
973             LOG(INFO) << "Mount point " << fstab[i].blk_device << " @ " << fstab[i].mount_point
974                       << " uses metadata encryption, which means we need to unmount it later and "
975                          "call encryptFstab/encrypt_inplace. To avoid file operations before "
976                          "encryption, we will mount it as read-only first";
977         }
978         while (retry_count-- > 0) {
979             if (!__mount(fstab[i].blk_device, fstab[i].mount_point, fstab[i], read_only)) {
980                 *attempted_idx = i;
981                 mounted = true;
982                 if (i != start_idx) {
983                     LINFO << __FUNCTION__ << "(): Mounted " << fstab[i].blk_device << " on "
984                           << fstab[i].mount_point << " with fs_type=" << fstab[i].fs_type
985                           << " instead of " << fstab[start_idx].fs_type;
986                 }
987                 fs_stat &= ~FS_STAT_FULL_MOUNT_FAILED;
988                 mount_errno = 0;
989                 break;
990             } else {
991                 if (retry_count <= 0) break;  // run check_fs only once
992                 fs_stat |= FS_STAT_FULL_MOUNT_FAILED;
993                 // back up the first errno for crypto decisions.
994                 if (mount_errno == 0) {
995                     mount_errno = errno;
996                 }
997                 // retry after fsck
998                 check_fs(fstab[i].blk_device, fstab[i].fs_type, fstab[i].mount_point, &fs_stat);
999             }
1000         }
1001         log_fs_stat(fstab[i].blk_device, fs_stat);
1002     }
1003 
1004     /* Adjust i for the case where it was still withing the recs[] */
1005     if (i < fstab.size()) --i;
1006 
1007     *end_idx = i;
1008     if (!mounted) {
1009         *attempted_idx = start_idx;
1010         errno = mount_errno;
1011         return false;
1012     }
1013     return true;
1014 }
1015 
TranslateExtLabels(FstabEntry * entry)1016 static bool TranslateExtLabels(FstabEntry* entry) {
1017     if (!StartsWith(entry->blk_device, "LABEL=")) {
1018         return true;
1019     }
1020 
1021     std::string label = entry->blk_device.substr(6);
1022     if (label.size() > 16) {
1023         LERROR << "FS label is longer than allowed by filesystem";
1024         return false;
1025     }
1026 
1027     auto blockdir = std::unique_ptr<DIR, decltype(&closedir)>{opendir("/dev/block"), closedir};
1028     if (!blockdir) {
1029         LERROR << "couldn't open /dev/block";
1030         return false;
1031     }
1032 
1033     struct dirent* ent;
1034     while ((ent = readdir(blockdir.get()))) {
1035         if (ent->d_type != DT_BLK)
1036             continue;
1037 
1038         unique_fd fd(TEMP_FAILURE_RETRY(
1039                 openat(dirfd(blockdir.get()), ent->d_name, O_RDONLY | O_CLOEXEC)));
1040         if (fd < 0) {
1041             LERROR << "Cannot open block device /dev/block/" << ent->d_name;
1042             return false;
1043         }
1044 
1045         ext4_super_block super_block;
1046         if (TEMP_FAILURE_RETRY(lseek(fd, 1024, SEEK_SET)) < 0 ||
1047             TEMP_FAILURE_RETRY(read(fd, &super_block, sizeof(super_block))) !=
1048                     sizeof(super_block)) {
1049             // Probably a loopback device or something else without a readable superblock.
1050             continue;
1051         }
1052 
1053         if (super_block.s_magic != EXT4_SUPER_MAGIC) {
1054             LINFO << "/dev/block/" << ent->d_name << " not ext{234}";
1055             continue;
1056         }
1057 
1058         if (label == super_block.s_volume_name) {
1059             std::string new_blk_device = "/dev/block/"s + ent->d_name;
1060 
1061             LINFO << "resolved label " << entry->blk_device << " to " << new_blk_device;
1062 
1063             entry->blk_device = new_blk_device;
1064             return true;
1065         }
1066     }
1067 
1068     return false;
1069 }
1070 
1071 // Check to see if a mountable volume has encryption requirements
handle_encryptable(const FstabEntry & entry)1072 static int handle_encryptable(const FstabEntry& entry) {
1073     if (should_use_metadata_encryption(entry)) {
1074         if (umount_retry(entry.mount_point)) {
1075             return FS_MGR_MNTALL_DEV_NEEDS_METADATA_ENCRYPTION;
1076         }
1077         PERROR << "Could not umount " << entry.mount_point << " - fail since can't encrypt";
1078         return FS_MGR_MNTALL_FAIL;
1079     } else if (entry.fs_mgr_flags.file_encryption) {
1080         LINFO << entry.mount_point << " is file encrypted";
1081         return FS_MGR_MNTALL_DEV_FILE_ENCRYPTED;
1082     } else {
1083         return FS_MGR_MNTALL_DEV_NOT_ENCRYPTABLE;
1084     }
1085 }
1086 
set_type_property(int status)1087 static void set_type_property(int status) {
1088     switch (status) {
1089         case FS_MGR_MNTALL_DEV_FILE_ENCRYPTED:
1090         case FS_MGR_MNTALL_DEV_IS_METADATA_ENCRYPTED:
1091         case FS_MGR_MNTALL_DEV_NEEDS_METADATA_ENCRYPTION:
1092             SetProperty("ro.crypto.type", "file");
1093             break;
1094     }
1095 }
1096 
call_vdc(const std::vector<std::string> & args,int * ret)1097 static bool call_vdc(const std::vector<std::string>& args, int* ret) {
1098     std::vector<char const*> argv;
1099     argv.emplace_back("/system/bin/vdc");
1100     for (auto& arg : args) {
1101         argv.emplace_back(arg.c_str());
1102     }
1103     LOG(INFO) << "Calling: " << android::base::Join(argv, ' ');
1104     int err = logwrap_fork_execvp(argv.size(), argv.data(), ret, false, LOG_ALOG, false, nullptr);
1105     if (err != 0) {
1106         LOG(ERROR) << "vdc call failed with error code: " << err;
1107         return false;
1108     }
1109     LOG(DEBUG) << "vdc finished successfully";
1110     if (ret != nullptr) {
1111         *ret = WEXITSTATUS(*ret);
1112     }
1113     return true;
1114 }
1115 
fs_mgr_update_logical_partition(FstabEntry * entry)1116 bool fs_mgr_update_logical_partition(FstabEntry* entry) {
1117     // Logical partitions are specified with a named partition rather than a
1118     // block device, so if the block device is a path, then it has already
1119     // been updated.
1120     if (entry->blk_device[0] == '/') {
1121         return true;
1122     }
1123 
1124     DeviceMapper& dm = DeviceMapper::Instance();
1125     std::string device_name;
1126     if (!dm.GetDmDevicePathByName(entry->blk_device, &device_name)) {
1127         return false;
1128     }
1129 
1130     entry->blk_device = device_name;
1131     return true;
1132 }
1133 
SupportsCheckpoint(FstabEntry * entry)1134 static bool SupportsCheckpoint(FstabEntry* entry) {
1135     return entry->fs_mgr_flags.checkpoint_blk || entry->fs_mgr_flags.checkpoint_fs;
1136 }
1137 
1138 class CheckpointManager {
1139   public:
CheckpointManager(int needs_checkpoint=-1,bool metadata_encrypted=false,bool needs_encrypt=false)1140     CheckpointManager(int needs_checkpoint = -1, bool metadata_encrypted = false,
1141                       bool needs_encrypt = false)
1142         : needs_checkpoint_(needs_checkpoint),
1143           metadata_encrypted_(metadata_encrypted),
1144           needs_encrypt_(needs_encrypt) {}
1145 
NeedsCheckpoint()1146     bool NeedsCheckpoint() {
1147         if (needs_checkpoint_ != UNKNOWN) {
1148             return needs_checkpoint_ == YES;
1149         }
1150         if (!call_vdc({"checkpoint", "needsCheckpoint"}, &needs_checkpoint_)) {
1151             LERROR << "Failed to find if checkpointing is needed. Assuming no.";
1152             needs_checkpoint_ = NO;
1153         }
1154         return needs_checkpoint_ == YES;
1155     }
1156 
Update(FstabEntry * entry,const std::string & block_device=std::string ())1157     bool Update(FstabEntry* entry, const std::string& block_device = std::string()) {
1158         if (!SupportsCheckpoint(entry)) {
1159             return true;
1160         }
1161 
1162         if (entry->fs_mgr_flags.checkpoint_blk && !metadata_encrypted_) {
1163             call_vdc({"checkpoint", "restoreCheckpoint", entry->blk_device}, nullptr);
1164         }
1165 
1166         if (!NeedsCheckpoint()) {
1167             return true;
1168         }
1169 
1170         if (!UpdateCheckpointPartition(entry, block_device)) {
1171             LERROR << "Could not set up checkpoint partition, skipping!";
1172             return false;
1173         }
1174 
1175         return true;
1176     }
1177 
Revert(FstabEntry * entry)1178     bool Revert(FstabEntry* entry) {
1179         if (!SupportsCheckpoint(entry)) {
1180             return true;
1181         }
1182 
1183         if (device_map_.find(entry->blk_device) == device_map_.end()) {
1184             return true;
1185         }
1186 
1187         std::string bow_device = entry->blk_device;
1188         entry->blk_device = device_map_[bow_device];
1189         device_map_.erase(bow_device);
1190 
1191         DeviceMapper& dm = DeviceMapper::Instance();
1192         if (!dm.DeleteDevice("bow")) {
1193             PERROR << "Failed to remove bow device";
1194         }
1195 
1196         return true;
1197     }
1198 
1199   private:
UpdateCheckpointPartition(FstabEntry * entry,const std::string & block_device)1200     bool UpdateCheckpointPartition(FstabEntry* entry, const std::string& block_device) {
1201         if (entry->fs_mgr_flags.checkpoint_fs) {
1202             if (is_f2fs(entry->fs_type)) {
1203                 entry->fs_checkpoint_opts = ",checkpoint=disable";
1204             } else {
1205                 LERROR << entry->fs_type << " does not implement checkpoints.";
1206             }
1207         } else if (entry->fs_mgr_flags.checkpoint_blk && !needs_encrypt_) {
1208             auto actual_block_device = block_device.empty() ? entry->blk_device : block_device;
1209             if (fs_mgr_find_bow_device(actual_block_device).empty()) {
1210                 unique_fd fd(
1211                         TEMP_FAILURE_RETRY(open(entry->blk_device.c_str(), O_RDONLY | O_CLOEXEC)));
1212                 if (fd < 0) {
1213                     PERROR << "Cannot open device " << entry->blk_device;
1214                     return false;
1215                 }
1216 
1217                 uint64_t size = get_block_device_size(fd) / 512;
1218                 if (!size) {
1219                     PERROR << "Cannot get device size";
1220                     return false;
1221                 }
1222 
1223                 // dm-bow will not load if size is not a multiple of 4096
1224                 // rounding down does not hurt, since ext4 will only use full blocks
1225                 size &= ~7;
1226 
1227                 android::dm::DmTable table;
1228                 auto bowTarget =
1229                         std::make_unique<android::dm::DmTargetBow>(0, size, entry->blk_device);
1230 
1231                 // dm-bow uses the first block as a log record, and relocates the real first block
1232                 // elsewhere. For metadata encrypted devices, dm-bow sits below dm-default-key, and
1233                 // for post Android Q devices dm-default-key uses a block size of 4096 always.
1234                 // So if dm-bow's block size, which by default is the block size of the underlying
1235                 // hardware, is less than dm-default-key's, blocks will get broken up and I/O will
1236                 // fail as it won't be data_unit_size aligned.
1237                 // However, since it is possible there is an already shipping non
1238                 // metadata-encrypted device with smaller blocks, we must not change this for
1239                 // devices shipped with Q or earlier unless they explicitly selected dm-default-key
1240                 // v2
1241                 unsigned int options_format_version = android::base::GetUintProperty<unsigned int>(
1242                         "ro.crypto.dm_default_key.options_format.version",
1243                         (android::fscrypt::GetFirstApiLevel() <= __ANDROID_API_Q__ ? 1 : 2));
1244                 if (options_format_version > 1) {
1245                     bowTarget->SetBlockSize(4096);
1246                 }
1247 
1248                 if (!table.AddTarget(std::move(bowTarget))) {
1249                     LERROR << "Failed to add bow target";
1250                     return false;
1251                 }
1252 
1253                 DeviceMapper& dm = DeviceMapper::Instance();
1254                 if (!dm.CreateDevice("bow", table)) {
1255                     PERROR << "Failed to create bow device";
1256                     return false;
1257                 }
1258 
1259                 std::string name;
1260                 if (!dm.GetDmDevicePathByName("bow", &name)) {
1261                     PERROR << "Failed to get bow device name";
1262                     return false;
1263                 }
1264 
1265                 device_map_[name] = entry->blk_device;
1266                 entry->blk_device = name;
1267             }
1268         }
1269         return true;
1270     }
1271 
1272     enum { UNKNOWN = -1, NO = 0, YES = 1 };
1273     int needs_checkpoint_;
1274     bool metadata_encrypted_;
1275     bool needs_encrypt_;
1276     std::map<std::string, std::string> device_map_;
1277 };
1278 
fs_mgr_find_bow_device(const std::string & block_device)1279 std::string fs_mgr_find_bow_device(const std::string& block_device) {
1280     // handle symlink such as "/dev/block/mapper/userdata"
1281     std::string real_path;
1282     if (!android::base::Realpath(block_device, &real_path)) {
1283         real_path = block_device;
1284     }
1285 
1286     struct stat st;
1287     if (stat(real_path.c_str(), &st) < 0) {
1288         PLOG(ERROR) << "stat failed: " << real_path;
1289         return std::string();
1290     }
1291     if (!S_ISBLK(st.st_mode)) {
1292         PLOG(ERROR) << real_path << " is not block device";
1293         return std::string();
1294     }
1295     std::string sys_dir = android::base::StringPrintf("/sys/dev/block/%u:%u", major(st.st_rdev),
1296                                                       minor(st.st_rdev));
1297     for (;;) {
1298         std::string name;
1299         if (!android::base::ReadFileToString(sys_dir + "/dm/name", &name)) {
1300             PLOG(ERROR) << real_path << " is not dm device";
1301             return std::string();
1302         }
1303 
1304         if (name == "bow\n") return sys_dir;
1305 
1306         std::string slaves = sys_dir + "/slaves";
1307         std::unique_ptr<DIR, decltype(&closedir)> directory(opendir(slaves.c_str()), closedir);
1308         if (!directory) {
1309             PLOG(ERROR) << "Can't open slave directory " << slaves;
1310             return std::string();
1311         }
1312 
1313         int count = 0;
1314         for (dirent* entry = readdir(directory.get()); entry; entry = readdir(directory.get())) {
1315             if (entry->d_type != DT_LNK) continue;
1316 
1317             if (count == 1) {
1318                 LOG(ERROR) << "Too many slaves in " << slaves;
1319                 return std::string();
1320             }
1321 
1322             ++count;
1323             sys_dir = std::string("/sys/block/") + entry->d_name;
1324         }
1325 
1326         if (count != 1) {
1327             LOG(ERROR) << "No slave in " << slaves;
1328             return std::string();
1329         }
1330     }
1331 }
1332 
1333 static constexpr const char* kUserdataWrapperName = "userdata-wrapper";
1334 
WrapUserdata(FstabEntry * entry,dev_t dev,const std::string & block_device)1335 static void WrapUserdata(FstabEntry* entry, dev_t dev, const std::string& block_device) {
1336     DeviceMapper& dm = DeviceMapper::Instance();
1337     if (dm.GetState(kUserdataWrapperName) != DmDeviceState::INVALID) {
1338         // This will report failure for us. If we do fail to get the path,
1339         // we leave the device unwrapped.
1340         dm.GetDmDevicePathByName(kUserdataWrapperName, &entry->blk_device);
1341         return;
1342     }
1343 
1344     unique_fd fd(open(block_device.c_str(), O_RDONLY | O_CLOEXEC));
1345     if (fd < 0) {
1346         PLOG(ERROR) << "open failed: " << entry->blk_device;
1347         return;
1348     }
1349 
1350     auto dev_str = android::base::StringPrintf("%u:%u", major(dev), minor(dev));
1351     uint64_t sectors = get_block_device_size(fd) / 512;
1352 
1353     android::dm::DmTable table;
1354     table.Emplace<DmTargetLinear>(0, sectors, dev_str, 0);
1355 
1356     std::string dm_path;
1357     if (!dm.CreateDevice(kUserdataWrapperName, table, &dm_path, 20s)) {
1358         LOG(ERROR) << "Failed to create userdata wrapper device";
1359         return;
1360     }
1361     entry->blk_device = dm_path;
1362 }
1363 
1364 // When using Virtual A/B, partitions can be backed by /data and mapped with
1365 // device-mapper in first-stage init. This can happen when merging an OTA or
1366 // when using adb remount to house "scratch". In this case, /data cannot be
1367 // mounted directly off the userdata block device, and e2fsck will refuse to
1368 // scan it, because the kernel reports the block device as in-use.
1369 //
1370 // As a workaround, when mounting /data, we create a trivial dm-linear wrapper
1371 // if the underlying block device already has dependencies. Note that we make
1372 // an exception for metadata-encrypted devices, since dm-default-key is already
1373 // a wrapper.
WrapUserdataIfNeeded(FstabEntry * entry,const std::string & actual_block_device={})1374 static void WrapUserdataIfNeeded(FstabEntry* entry, const std::string& actual_block_device = {}) {
1375     const auto& block_device =
1376             actual_block_device.empty() ? entry->blk_device : actual_block_device;
1377     if (entry->mount_point != "/data" || !entry->metadata_key_dir.empty() ||
1378         android::base::StartsWith(block_device, "/dev/block/dm-")) {
1379         return;
1380     }
1381 
1382     struct stat st;
1383     if (stat(block_device.c_str(), &st) < 0) {
1384         PLOG(ERROR) << "stat failed: " << block_device;
1385         return;
1386     }
1387 
1388     std::string path = android::base::StringPrintf("/sys/dev/block/%u:%u/holders",
1389                                                    major(st.st_rdev), minor(st.st_rdev));
1390     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(path.c_str()), closedir);
1391     if (!dir) {
1392         PLOG(ERROR) << "opendir failed: " << path;
1393         return;
1394     }
1395 
1396     struct dirent* d;
1397     bool has_holders = false;
1398     while ((d = readdir(dir.get())) != nullptr) {
1399         if (strcmp(d->d_name, ".") != 0 && strcmp(d->d_name, "..") != 0) {
1400             has_holders = true;
1401             break;
1402         }
1403     }
1404 
1405     if (has_holders) {
1406         WrapUserdata(entry, st.st_rdev, block_device);
1407     }
1408 }
1409 
IsMountPointMounted(const std::string & mount_point)1410 static bool IsMountPointMounted(const std::string& mount_point) {
1411     // Check if this is already mounted.
1412     Fstab fstab;
1413     if (!ReadFstabFromFile("/proc/mounts", &fstab)) {
1414         return false;
1415     }
1416     return GetEntryForMountPoint(&fstab, mount_point) != nullptr;
1417 }
1418 
1419 // When multiple fstab records share the same mount_point, it will try to mount each
1420 // one in turn, and ignore any duplicates after a first successful mount.
1421 // Returns -1 on error, and  FS_MGR_MNTALL_* otherwise.
fs_mgr_mount_all(Fstab * fstab,int mount_mode)1422 MountAllResult fs_mgr_mount_all(Fstab* fstab, int mount_mode) {
1423     int encryptable = FS_MGR_MNTALL_DEV_NOT_ENCRYPTABLE;
1424     int error_count = 0;
1425     CheckpointManager checkpoint_manager;
1426     AvbUniquePtr avb_handle(nullptr);
1427     bool wiped = false;
1428 
1429     bool userdata_mounted = false;
1430     if (fstab->empty()) {
1431         return {FS_MGR_MNTALL_FAIL, userdata_mounted};
1432     }
1433 
1434     bool scratch_can_be_mounted = true;
1435 
1436     // Keep i int to prevent unsigned integer overflow from (i = top_idx - 1),
1437     // where top_idx is 0. It will give SIGABRT
1438     for (int i = 0; i < static_cast<int>(fstab->size()); i++) {
1439         auto& current_entry = (*fstab)[i];
1440 
1441         // If a filesystem should have been mounted in the first stage, we
1442         // ignore it here. With one exception, if the filesystem is
1443         // formattable, then it can only be formatted in the second stage,
1444         // so we allow it to mount here.
1445         if (current_entry.fs_mgr_flags.first_stage_mount &&
1446             (!current_entry.fs_mgr_flags.formattable ||
1447              IsMountPointMounted(current_entry.mount_point))) {
1448             continue;
1449         }
1450 
1451         // Don't mount entries that are managed by vold or not for the mount mode.
1452         if (current_entry.fs_mgr_flags.vold_managed || current_entry.fs_mgr_flags.recovery_only ||
1453             ((mount_mode == MOUNT_MODE_LATE) && !current_entry.fs_mgr_flags.late_mount) ||
1454             ((mount_mode == MOUNT_MODE_EARLY) && current_entry.fs_mgr_flags.late_mount)) {
1455             continue;
1456         }
1457 
1458         // Skip swap and raw partition entries such as boot, recovery, etc.
1459         if (current_entry.fs_type == "swap" || current_entry.fs_type == "emmc" ||
1460             current_entry.fs_type == "mtd") {
1461             continue;
1462         }
1463 
1464         // Skip mounting the root partition, as it will already have been mounted.
1465         if (current_entry.mount_point == "/" || current_entry.mount_point == "/system") {
1466             if ((current_entry.flags & MS_RDONLY) != 0) {
1467                 fs_mgr_set_blk_ro(current_entry.blk_device);
1468             }
1469             continue;
1470         }
1471 
1472         // Terrible hack to make it possible to remount /data.
1473         // TODO: refactor fs_mgr_mount_all and get rid of this.
1474         if (mount_mode == MOUNT_MODE_ONLY_USERDATA && current_entry.mount_point != "/data") {
1475             continue;
1476         }
1477 
1478         // Translate LABEL= file system labels into block devices.
1479         if (is_extfs(current_entry.fs_type)) {
1480             if (!TranslateExtLabels(&current_entry)) {
1481                 LERROR << "Could not translate label to block device";
1482                 continue;
1483             }
1484         }
1485 
1486         if (current_entry.fs_mgr_flags.logical) {
1487             if (!fs_mgr_update_logical_partition(&current_entry)) {
1488                 LERROR << "Could not set up logical partition, skipping!";
1489                 continue;
1490             }
1491         }
1492 
1493         WrapUserdataIfNeeded(&current_entry);
1494 
1495         if (!checkpoint_manager.Update(&current_entry)) {
1496             continue;
1497         }
1498 
1499         if (current_entry.fs_mgr_flags.wait && !WaitForFile(current_entry.blk_device, 20s)) {
1500             LERROR << "Skipping '" << current_entry.blk_device << "' during mount_all";
1501             continue;
1502         }
1503 
1504         if (current_entry.fs_mgr_flags.avb) {
1505             if (!avb_handle) {
1506                 avb_handle = AvbHandle::Open();
1507                 if (!avb_handle) {
1508                     LERROR << "Failed to open AvbHandle";
1509                     set_type_property(encryptable);
1510                     return {FS_MGR_MNTALL_FAIL, userdata_mounted};
1511                 }
1512             }
1513             if (avb_handle->SetUpAvbHashtree(&current_entry, true /* wait_for_verity_dev */) ==
1514                 AvbHashtreeResult::kFail) {
1515                 LERROR << "Failed to set up AVB on partition: " << current_entry.mount_point
1516                        << ", skipping!";
1517                 // Skips mounting the device.
1518                 continue;
1519             }
1520         } else if (!current_entry.avb_keys.empty()) {
1521             if (AvbHandle::SetUpStandaloneAvbHashtree(&current_entry) == AvbHashtreeResult::kFail) {
1522                 LERROR << "Failed to set up AVB on standalone partition: "
1523                        << current_entry.mount_point << ", skipping!";
1524                 // Skips mounting the device.
1525                 continue;
1526             }
1527         }
1528 
1529         int last_idx_inspected;
1530         int top_idx = i;
1531         int attempted_idx = -1;
1532 
1533         bool mret = mount_with_alternatives(*fstab, i, &last_idx_inspected, &attempted_idx);
1534         auto& attempted_entry = (*fstab)[attempted_idx];
1535         i = last_idx_inspected;
1536         int mount_errno = errno;
1537 
1538         // Handle success and deal with encryptability.
1539         if (mret) {
1540             int status = handle_encryptable(attempted_entry);
1541 
1542             if (status == FS_MGR_MNTALL_FAIL) {
1543                 // Fatal error - no point continuing.
1544                 return {status, userdata_mounted};
1545             }
1546 
1547             if (status != FS_MGR_MNTALL_DEV_NOT_ENCRYPTABLE) {
1548                 if (encryptable != FS_MGR_MNTALL_DEV_NOT_ENCRYPTABLE) {
1549                     // Log and continue
1550                     LERROR << "Only one encryptable/encrypted partition supported";
1551                 }
1552                 encryptable = status;
1553                 if (status == FS_MGR_MNTALL_DEV_NEEDS_METADATA_ENCRYPTION) {
1554                     fs_mgr_set_blk_ro(attempted_entry.blk_device, false);
1555                     if (!call_vdc({"cryptfs", "encryptFstab", attempted_entry.blk_device,
1556                                    attempted_entry.mount_point, wiped ? "true" : "false",
1557                                    attempted_entry.fs_type,
1558                                    attempted_entry.fs_mgr_flags.is_zoned ? "true" : "false",
1559                                    android::base::Join(attempted_entry.user_devices, ' ')},
1560                                   nullptr)) {
1561                         LERROR << "Encryption failed";
1562                         set_type_property(encryptable);
1563                         return {FS_MGR_MNTALL_FAIL, userdata_mounted};
1564                     }
1565                 }
1566             }
1567 
1568             if (current_entry.mount_point == "/data") {
1569                 userdata_mounted = true;
1570             }
1571 
1572             MountOverlayfs(attempted_entry, &scratch_can_be_mounted);
1573 
1574             // Success!  Go get the next one.
1575             continue;
1576         }
1577 
1578         // Mounting failed, understand why and retry.
1579         wiped = partition_wiped(current_entry.blk_device.c_str());
1580         if (mount_errno != EBUSY && mount_errno != EACCES &&
1581             current_entry.fs_mgr_flags.formattable && wiped) {
1582             // current_entry and attempted_entry point at the same partition, but sometimes
1583             // at two different lines in the fstab.  Use current_entry for formatting
1584             // as that is the preferred one.
1585             LERROR << __FUNCTION__ << "(): " << realpath(current_entry.blk_device)
1586                    << " is wiped and " << current_entry.mount_point << " " << current_entry.fs_type
1587                    << " is formattable. Format it.";
1588 
1589             checkpoint_manager.Revert(&current_entry);
1590 
1591             // EncryptInplace will be used when vdc gives an error or needs to format partitions
1592             // other than /data
1593             if (should_use_metadata_encryption(current_entry) &&
1594                 current_entry.mount_point == "/data") {
1595 
1596                 // vdc->Format requires "ro.crypto.type" to set an encryption flag
1597                 encryptable = FS_MGR_MNTALL_DEV_IS_METADATA_ENCRYPTED;
1598                 set_type_property(encryptable);
1599 
1600                 if (!call_vdc({"cryptfs", "encryptFstab", current_entry.blk_device,
1601                                current_entry.mount_point, "true" /* shouldFormat */,
1602                                current_entry.fs_type,
1603                                current_entry.fs_mgr_flags.is_zoned ? "true" : "false",
1604                                android::base::Join(current_entry.user_devices, ' ')},
1605                               nullptr)) {
1606                     LERROR << "Encryption failed";
1607                 } else {
1608                     userdata_mounted = true;
1609                     continue;
1610                 }
1611             }
1612 
1613             if (fs_mgr_do_format(current_entry) == 0) {
1614                 // Let's replay the mount actions.
1615                 i = top_idx - 1;
1616                 continue;
1617             } else {
1618                 LERROR << __FUNCTION__ << "(): Format failed. "
1619                        << "Suggest recovery...";
1620                 encryptable = FS_MGR_MNTALL_DEV_NEEDS_RECOVERY;
1621                 continue;
1622             }
1623         }
1624 
1625         // mount(2) returned an error, handle the encryptable/formattable case.
1626         if (mount_errno != EBUSY && mount_errno != EACCES &&
1627             should_use_metadata_encryption(attempted_entry)) {
1628             if (!call_vdc({"cryptfs", "mountFstab", attempted_entry.blk_device,
1629                            attempted_entry.mount_point,
1630                            current_entry.fs_mgr_flags.is_zoned ? "true" : "false",
1631                            android::base::Join(current_entry.user_devices, ' ')},
1632                           nullptr)) {
1633                 ++error_count;
1634             } else if (current_entry.mount_point == "/data") {
1635                 userdata_mounted = true;
1636             }
1637             encryptable = FS_MGR_MNTALL_DEV_IS_METADATA_ENCRYPTED;
1638             continue;
1639         } else {
1640             // fs_options might be null so we cannot use PERROR << directly.
1641             // Use StringPrintf to output "(null)" instead.
1642             if (attempted_entry.fs_mgr_flags.no_fail) {
1643                 PERROR << android::base::StringPrintf(
1644                         "Ignoring failure to mount an un-encryptable or wiped "
1645                         "partition on %s at %s options: %s",
1646                         attempted_entry.blk_device.c_str(), attempted_entry.mount_point.c_str(),
1647                         attempted_entry.fs_options.c_str());
1648             } else {
1649                 PERROR << android::base::StringPrintf(
1650                         "Failed to mount an un-encryptable or wiped partition "
1651                         "on %s at %s options: %s",
1652                         attempted_entry.blk_device.c_str(), attempted_entry.mount_point.c_str(),
1653                         attempted_entry.fs_options.c_str());
1654                 ++error_count;
1655             }
1656             continue;
1657         }
1658     }
1659     if (userdata_mounted) {
1660         Fstab mounted_fstab;
1661         if (!ReadFstabFromFile("/proc/mounts", &mounted_fstab)) {
1662             LOG(ERROR) << "Could't load fstab from /proc/mounts , unable to set ro.fstype.data . "
1663                           "init.rc actions depending on this prop would not run, boot might fail.";
1664         } else {
1665             for (const auto& entry : mounted_fstab) {
1666                 if (entry.mount_point == "/data") {
1667                     android::base::SetProperty("ro.fstype.data", entry.fs_type);
1668                 }
1669             }
1670         }
1671     }
1672 
1673     set_type_property(encryptable);
1674 
1675     if (error_count) {
1676         return {FS_MGR_MNTALL_FAIL, userdata_mounted};
1677     } else {
1678         return {encryptable, userdata_mounted};
1679     }
1680 }
1681 
fs_mgr_umount_all(android::fs_mgr::Fstab * fstab)1682 int fs_mgr_umount_all(android::fs_mgr::Fstab* fstab) {
1683     AvbUniquePtr avb_handle(nullptr);
1684     int ret = FsMgrUmountStatus::SUCCESS;
1685     for (auto& current_entry : *fstab) {
1686         if (!IsMountPointMounted(current_entry.mount_point)) {
1687             continue;
1688         }
1689 
1690         if (umount(current_entry.mount_point.c_str()) == -1) {
1691             PERROR << "Failed to umount " << current_entry.mount_point;
1692             ret |= FsMgrUmountStatus::ERROR_UMOUNT;
1693             continue;
1694         }
1695 
1696         if (current_entry.fs_mgr_flags.logical) {
1697             if (!fs_mgr_update_logical_partition(&current_entry)) {
1698                 LERROR << "Could not get logical partition blk_device, skipping!";
1699                 ret |= FsMgrUmountStatus::ERROR_DEVICE_MAPPER;
1700                 continue;
1701             }
1702         }
1703 
1704         if (current_entry.fs_mgr_flags.avb || !current_entry.avb_keys.empty()) {
1705             if (!AvbHandle::TearDownAvbHashtree(&current_entry, true /* wait */)) {
1706                 LERROR << "Failed to tear down AVB on mount point: " << current_entry.mount_point;
1707                 ret |= FsMgrUmountStatus::ERROR_VERITY;
1708                 continue;
1709             }
1710         }
1711     }
1712     return ret;
1713 }
1714 
GetMillisProperty(const std::string & name,std::chrono::milliseconds default_value)1715 static std::chrono::milliseconds GetMillisProperty(const std::string& name,
1716                                                    std::chrono::milliseconds default_value) {
1717     auto value = GetUintProperty(name, static_cast<uint64_t>(default_value.count()));
1718     return std::chrono::milliseconds(std::move(value));
1719 }
1720 
fs_mgr_unmount_all_data_mounts(const std::string & data_block_device)1721 static bool fs_mgr_unmount_all_data_mounts(const std::string& data_block_device) {
1722     LINFO << __FUNCTION__ << "(): about to umount everything on top of " << data_block_device;
1723     Timer t;
1724     auto timeout = GetMillisProperty("init.userspace_reboot.userdata_remount.timeoutmillis", 5s);
1725     while (true) {
1726         bool umount_done = true;
1727         Fstab proc_mounts;
1728         if (!ReadFstabFromFile("/proc/mounts", &proc_mounts)) {
1729             LERROR << __FUNCTION__ << "(): Can't read /proc/mounts";
1730             return false;
1731         }
1732         // Now proceed with other bind mounts on top of /data.
1733         for (const auto& entry : proc_mounts) {
1734             std::string block_device;
1735             if (StartsWith(entry.blk_device, "/dev/block") &&
1736                 !Realpath(entry.blk_device, &block_device)) {
1737                 PWARNING << __FUNCTION__ << "(): failed to realpath " << entry.blk_device;
1738                 block_device = entry.blk_device;
1739             }
1740             if (data_block_device == block_device) {
1741                 if (umount2(entry.mount_point.c_str(), 0) != 0) {
1742                     PERROR << __FUNCTION__ << "(): Failed to umount " << entry.mount_point;
1743                     umount_done = false;
1744                 }
1745             }
1746         }
1747         if (umount_done) {
1748             LINFO << __FUNCTION__ << "(): Unmounting /data took " << t;
1749             return true;
1750         }
1751         if (t.duration() > timeout) {
1752             LERROR << __FUNCTION__ << "(): Timed out unmounting all mounts on "
1753                    << data_block_device;
1754             Fstab remaining_mounts;
1755             if (!ReadFstabFromFile("/proc/mounts", &remaining_mounts)) {
1756                 LERROR << __FUNCTION__ << "(): Can't read /proc/mounts";
1757             } else {
1758                 LERROR << __FUNCTION__ << "(): Following mounts remaining";
1759                 for (const auto& e : remaining_mounts) {
1760                     LERROR << __FUNCTION__ << "(): mount point: " << e.mount_point
1761                            << " block device: " << e.blk_device;
1762                 }
1763             }
1764             return false;
1765         }
1766         std::this_thread::sleep_for(50ms);
1767     }
1768 }
1769 
UnwindDmDeviceStack(const std::string & block_device,std::vector<std::string> * dm_stack)1770 static bool UnwindDmDeviceStack(const std::string& block_device,
1771                                 std::vector<std::string>* dm_stack) {
1772     if (!StartsWith(block_device, "/dev/block/")) {
1773         LWARNING << block_device << " is not a block device";
1774         return false;
1775     }
1776     std::string current = block_device;
1777     DeviceMapper& dm = DeviceMapper::Instance();
1778     while (true) {
1779         dm_stack->push_back(current);
1780         if (!dm.IsDmBlockDevice(current)) {
1781             break;
1782         }
1783         auto parent = dm.GetParentBlockDeviceByPath(current);
1784         if (!parent) {
1785             return false;
1786         }
1787         current = *parent;
1788     }
1789     return true;
1790 }
1791 
fs_mgr_get_mounted_entry_for_userdata(Fstab * fstab,const std::string & data_block_device)1792 FstabEntry* fs_mgr_get_mounted_entry_for_userdata(Fstab* fstab,
1793                                                   const std::string& data_block_device) {
1794     std::vector<std::string> dm_stack;
1795     if (!UnwindDmDeviceStack(data_block_device, &dm_stack)) {
1796         LERROR << "Failed to unwind dm-device stack for " << data_block_device;
1797         return nullptr;
1798     }
1799     for (auto& entry : *fstab) {
1800         if (entry.mount_point != "/data") {
1801             continue;
1802         }
1803         std::string block_device;
1804         if (entry.fs_mgr_flags.logical) {
1805             if (!fs_mgr_update_logical_partition(&entry)) {
1806                 LERROR << "Failed to update logic partition " << entry.blk_device;
1807                 continue;
1808             }
1809             block_device = entry.blk_device;
1810         } else if (!Realpath(entry.blk_device, &block_device)) {
1811             PWARNING << "Failed to realpath " << entry.blk_device;
1812             block_device = entry.blk_device;
1813         }
1814         if (std::find(dm_stack.begin(), dm_stack.end(), block_device) != dm_stack.end()) {
1815             return &entry;
1816         }
1817     }
1818     LERROR << "Didn't find entry that was used to mount /data onto " << data_block_device;
1819     return nullptr;
1820 }
1821 
1822 // TODO(b/143970043): return different error codes based on which step failed.
fs_mgr_remount_userdata_into_checkpointing(Fstab * fstab)1823 int fs_mgr_remount_userdata_into_checkpointing(Fstab* fstab) {
1824     Fstab proc_mounts;
1825     if (!ReadFstabFromFile("/proc/mounts", &proc_mounts)) {
1826         LERROR << "Can't read /proc/mounts";
1827         return -1;
1828     }
1829     auto mounted_entry = GetEntryForMountPoint(&proc_mounts, "/data");
1830     if (mounted_entry == nullptr) {
1831         LERROR << "/data is not mounted";
1832         return -1;
1833     }
1834     std::string block_device;
1835     if (!Realpath(mounted_entry->blk_device, &block_device)) {
1836         PERROR << "Failed to realpath " << mounted_entry->blk_device;
1837         return -1;
1838     }
1839     auto fstab_entry = fs_mgr_get_mounted_entry_for_userdata(fstab, block_device);
1840     if (fstab_entry == nullptr) {
1841         LERROR << "Can't find /data in fstab";
1842         return -1;
1843     }
1844     bool force_umount = GetBoolProperty("sys.init.userdata_remount.force_umount", false);
1845     if (force_umount) {
1846         LINFO << "Will force an umount of userdata even if it's not required";
1847     }
1848     if (!force_umount && !SupportsCheckpoint(fstab_entry)) {
1849         LINFO << "Userdata doesn't support checkpointing. Nothing to do";
1850         return 0;
1851     }
1852     CheckpointManager checkpoint_manager;
1853     if (!force_umount && !checkpoint_manager.NeedsCheckpoint()) {
1854         LINFO << "Checkpointing not needed. Don't remount";
1855         return 0;
1856     }
1857     if (!force_umount && fstab_entry->fs_mgr_flags.checkpoint_fs) {
1858         // Userdata is f2fs, simply remount it.
1859         if (!checkpoint_manager.Update(fstab_entry)) {
1860             LERROR << "Failed to remount userdata in checkpointing mode";
1861             return -1;
1862         }
1863         if (mount(block_device.c_str(), fstab_entry->mount_point.c_str(), "none",
1864                   MS_REMOUNT | fstab_entry->flags, fstab_entry->fs_options.c_str()) != 0) {
1865             PERROR << "Failed to remount userdata in checkpointing mode";
1866             return -1;
1867         }
1868     } else {
1869         LINFO << "Unmounting /data before remounting into checkpointing mode";
1870         if (!fs_mgr_unmount_all_data_mounts(block_device)) {
1871             LERROR << "Failed to umount /data";
1872             return -1;
1873         }
1874         DeviceMapper& dm = DeviceMapper::Instance();
1875         while (dm.IsDmBlockDevice(block_device)) {
1876             auto next_device = dm.GetParentBlockDeviceByPath(block_device);
1877             auto name = dm.GetDmDeviceNameByPath(block_device);
1878             if (!name) {
1879                 LERROR << "Failed to get dm-name for " << block_device;
1880                 return -1;
1881             }
1882             LINFO << "Deleting " << block_device << " named " << *name;
1883             if (!dm.DeleteDevice(*name, 3s)) {
1884                 return -1;
1885             }
1886             if (!next_device) {
1887                 LERROR << "Failed to find parent device for " << block_device;
1888             }
1889             block_device = *next_device;
1890         }
1891         LINFO << "Remounting /data";
1892         // TODO(b/143970043): remove this hack after fs_mgr_mount_all is refactored.
1893         auto result = fs_mgr_mount_all(fstab, MOUNT_MODE_ONLY_USERDATA);
1894         return result.code == FS_MGR_MNTALL_FAIL ? -1 : 0;
1895     }
1896     return 0;
1897 }
1898 
1899 // wrapper to __mount() and expects a fully prepared fstab_rec,
1900 // unlike fs_mgr_do_mount which does more things with avb / verity etc.
fs_mgr_do_mount_one(const FstabEntry & entry,const std::string & alt_mount_point)1901 int fs_mgr_do_mount_one(const FstabEntry& entry, const std::string& alt_mount_point) {
1902     // First check the filesystem if requested.
1903     if (entry.fs_mgr_flags.wait && !WaitForFile(entry.blk_device, 20s)) {
1904         LERROR << "Skipping mounting '" << entry.blk_device << "'";
1905     }
1906 
1907     auto& mount_point = alt_mount_point.empty() ? entry.mount_point : alt_mount_point;
1908 
1909     // Run fsck if needed
1910     int ret = prepare_fs_for_mount(entry.blk_device, entry, mount_point);
1911     // Wiped case doesn't require to try __mount below.
1912     if (ret & FS_STAT_INVALID_MAGIC) {
1913       return FS_MGR_DOMNT_FAILED;
1914     }
1915 
1916     ret = __mount(entry.blk_device, mount_point, entry);
1917     if (ret) {
1918       ret = (errno == EBUSY) ? FS_MGR_DOMNT_BUSY : FS_MGR_DOMNT_FAILED;
1919     }
1920 
1921     return ret;
1922 }
1923 
1924 // If multiple fstab entries are to be mounted on "n_name", it will try to mount each one
1925 // in turn, and stop on 1st success, or no more match.
fs_mgr_do_mount(Fstab * fstab,const std::string & n_name,const std::string & n_blk_device,int needs_checkpoint,bool needs_encrypt)1926 int fs_mgr_do_mount(Fstab* fstab, const std::string& n_name, const std::string& n_blk_device,
1927                     int needs_checkpoint, bool needs_encrypt) {
1928     int mount_errors = 0;
1929     int first_mount_errno = 0;
1930     std::string mount_point;
1931     CheckpointManager checkpoint_manager(needs_checkpoint, true, needs_encrypt);
1932     AvbUniquePtr avb_handle(nullptr);
1933 
1934     if (!fstab) {
1935         return FS_MGR_DOMNT_FAILED;
1936     }
1937 
1938     for (auto& fstab_entry : *fstab) {
1939         if (!fs_match(fstab_entry.mount_point, n_name)) {
1940             continue;
1941         }
1942 
1943         // We found our match.
1944         // If this swap or a raw partition, report an error.
1945         if (fstab_entry.fs_type == "swap" || fstab_entry.fs_type == "emmc" ||
1946             fstab_entry.fs_type == "mtd") {
1947             LERROR << "Cannot mount filesystem of type " << fstab_entry.fs_type << " on "
1948                    << n_blk_device;
1949             return FS_MGR_DOMNT_FAILED;
1950         }
1951 
1952         if (fstab_entry.fs_mgr_flags.logical) {
1953             if (!fs_mgr_update_logical_partition(&fstab_entry)) {
1954                 LERROR << "Could not set up logical partition, skipping!";
1955                 continue;
1956             }
1957         }
1958 
1959         WrapUserdataIfNeeded(&fstab_entry, n_blk_device);
1960 
1961         if (!checkpoint_manager.Update(&fstab_entry, n_blk_device)) {
1962             LERROR << "Could not set up checkpoint partition, skipping!";
1963             continue;
1964         }
1965 
1966         // First check the filesystem if requested.
1967         if (fstab_entry.fs_mgr_flags.wait && !WaitForFile(n_blk_device, 20s)) {
1968             LERROR << "Skipping mounting '" << n_blk_device << "'";
1969             continue;
1970         }
1971 
1972         // Now mount it where requested */
1973         mount_point = fstab_entry.mount_point;
1974 
1975         int fs_stat = prepare_fs_for_mount(n_blk_device, fstab_entry, mount_point);
1976 
1977         if (fstab_entry.fs_mgr_flags.avb) {
1978             if (!avb_handle) {
1979                 avb_handle = AvbHandle::Open();
1980                 if (!avb_handle) {
1981                     LERROR << "Failed to open AvbHandle";
1982                     return FS_MGR_DOMNT_FAILED;
1983                 }
1984             }
1985             if (avb_handle->SetUpAvbHashtree(&fstab_entry, true /* wait_for_verity_dev */) ==
1986                 AvbHashtreeResult::kFail) {
1987                 LERROR << "Failed to set up AVB on partition: " << fstab_entry.mount_point
1988                        << ", skipping!";
1989                 // Skips mounting the device.
1990                 continue;
1991             }
1992         } else if (!fstab_entry.avb_keys.empty()) {
1993             if (AvbHandle::SetUpStandaloneAvbHashtree(&fstab_entry) == AvbHashtreeResult::kFail) {
1994                 LERROR << "Failed to set up AVB on standalone partition: "
1995                        << fstab_entry.mount_point << ", skipping!";
1996                 // Skips mounting the device.
1997                 continue;
1998             }
1999         }
2000 
2001         int retry_count = 2;
2002         while (retry_count-- > 0) {
2003             if (!__mount(n_blk_device, mount_point, fstab_entry)) {
2004                 fs_stat &= ~FS_STAT_FULL_MOUNT_FAILED;
2005                 log_fs_stat(fstab_entry.blk_device, fs_stat);
2006                 return FS_MGR_DOMNT_SUCCESS;
2007             } else {
2008                 if (retry_count <= 0) break;  // run check_fs only once
2009                 if (!first_mount_errno) first_mount_errno = errno;
2010                 mount_errors++;
2011                 PERROR << "Cannot mount filesystem on " << n_blk_device << " at " << mount_point
2012                        << " with fstype " << fstab_entry.fs_type;
2013                 fs_stat |= FS_STAT_FULL_MOUNT_FAILED;
2014                 // try again after fsck
2015                 check_fs(n_blk_device, fstab_entry.fs_type, mount_point, &fs_stat);
2016             }
2017         }
2018         log_fs_stat(fstab_entry.blk_device, fs_stat);
2019     }
2020 
2021     // Reach here means the mount attempt fails.
2022     if (mount_errors) {
2023         PERROR << "Cannot mount filesystem on " << n_blk_device << " at " << mount_point;
2024         if (first_mount_errno == EBUSY) return FS_MGR_DOMNT_BUSY;
2025     } else {
2026         // We didn't find a match, say so and return an error.
2027         LERROR << "Cannot find mount point " << n_name << " in fstab";
2028     }
2029     return FS_MGR_DOMNT_FAILED;
2030 }
2031 
ConfigureIoScheduler(const std::string & device_path)2032 static bool ConfigureIoScheduler(const std::string& device_path) {
2033     if (!StartsWith(device_path, "/dev/")) {
2034         LERROR << __func__ << ": invalid argument " << device_path;
2035         return false;
2036     }
2037 
2038     const std::string iosched_path =
2039             StringPrintf("/sys/block/%s/queue/scheduler", Basename(device_path).c_str());
2040     unique_fd iosched_fd(open(iosched_path.c_str(), O_RDWR | O_CLOEXEC));
2041     if (iosched_fd.get() == -1) {
2042         PERROR << __func__ << ": failed to open " << iosched_path;
2043         return false;
2044     }
2045 
2046     // Kernels before v4.1 only support 'noop'. Kernels [v4.1, v5.0) support
2047     // 'noop' and 'none'. Kernels v5.0 and later only support 'none'.
2048     static constexpr const std::array<std::string_view, 2> kNoScheduler = {"none", "noop"};
2049 
2050     for (const std::string_view& scheduler : kNoScheduler) {
2051         int ret = write(iosched_fd.get(), scheduler.data(), scheduler.size());
2052         if (ret > 0) {
2053             return true;
2054         }
2055     }
2056 
2057     PERROR << __func__ << ": failed to write to " << iosched_path;
2058     return false;
2059 }
2060 
InstallZramDevice(const std::string & device)2061 static bool InstallZramDevice(const std::string& device) {
2062     if (!android::base::WriteStringToFile(device, ZRAM_BACK_DEV)) {
2063         PERROR << "Cannot write " << device << " in: " << ZRAM_BACK_DEV;
2064         return false;
2065     }
2066     LINFO << "Success to set " << device << " to " << ZRAM_BACK_DEV;
2067     return true;
2068 }
2069 
PrepareZramBackingDevice(off64_t size)2070 static bool PrepareZramBackingDevice(off64_t size) {
2071 
2072     constexpr const char* file_path = "/data/per_boot/zram_swap";
2073     if (size == 0) return true;
2074 
2075     // Prepare target path
2076     unique_fd target_fd(TEMP_FAILURE_RETRY(open(file_path, O_RDWR | O_CREAT | O_CLOEXEC, 0600)));
2077     if (target_fd.get() == -1) {
2078         PERROR << "Cannot open target path: " << file_path;
2079         return false;
2080     }
2081     if (fallocate(target_fd.get(), 0, 0, size) < 0) {
2082         PERROR << "Cannot truncate target path: " << file_path;
2083         return false;
2084     }
2085 
2086     // Allocate loop device and attach it to file_path.
2087     LoopControl loop_control;
2088     std::string loop_device;
2089     if (!loop_control.Attach(target_fd.get(), 5s, &loop_device)) {
2090         return false;
2091     }
2092 
2093     ConfigureIoScheduler(loop_device);
2094 
2095     if (auto ret = ConfigureQueueDepth(loop_device, "/"); !ret.ok()) {
2096         LOG(DEBUG) << "Failed to config queue depth: " << ret.error().message();
2097     }
2098 
2099     // set block size & direct IO
2100     unique_fd loop_fd(TEMP_FAILURE_RETRY(open(loop_device.c_str(), O_RDWR | O_CLOEXEC)));
2101     if (loop_fd.get() == -1) {
2102         PERROR << "Cannot open " << loop_device;
2103         return false;
2104     }
2105     if (!LoopControl::SetAutoClearStatus(loop_fd.get())) {
2106         PERROR << "Failed set LO_FLAGS_AUTOCLEAR for " << loop_device;
2107     }
2108     if (!LoopControl::EnableDirectIo(loop_fd.get())) {
2109         return false;
2110     }
2111 
2112     return InstallZramDevice(loop_device);
2113 }
2114 
fs_mgr_swapon_all(const Fstab & fstab)2115 bool fs_mgr_swapon_all(const Fstab& fstab) {
2116     bool ret = true;
2117     for (const auto& entry : fstab) {
2118         // Skip non-swap entries.
2119         if (entry.fs_type != "swap") {
2120             continue;
2121         }
2122 
2123         if (entry.zram_size > 0) {
2124             if (!PrepareZramBackingDevice(entry.zram_backingdev_size)) {
2125                 LERROR << "Failure of zram backing device file for '" << entry.blk_device << "'";
2126             }
2127             // A zram_size was specified, so we need to configure the
2128             // device.  There is no point in having multiple zram devices
2129             // on a system (all the memory comes from the same pool) so
2130             // we can assume the device number is 0.
2131             if (entry.max_comp_streams >= 0) {
2132                 auto zram_mcs_fp = std::unique_ptr<FILE, decltype(&fclose)>{
2133                         fopen(ZRAM_CONF_MCS, "re"), fclose};
2134                 if (zram_mcs_fp == nullptr) {
2135                     LERROR << "Unable to open zram conf comp device " << ZRAM_CONF_MCS;
2136                     ret = false;
2137                     continue;
2138                 }
2139                 fprintf(zram_mcs_fp.get(), "%d\n", entry.max_comp_streams);
2140             }
2141 
2142             auto zram_fp =
2143                     std::unique_ptr<FILE, decltype(&fclose)>{fopen(ZRAM_CONF_DEV, "re+"), fclose};
2144             if (zram_fp == nullptr) {
2145                 LERROR << "Unable to open zram conf device " << ZRAM_CONF_DEV;
2146                 ret = false;
2147                 continue;
2148             }
2149             fprintf(zram_fp.get(), "%" PRId64 "\n", entry.zram_size);
2150         }
2151 
2152         if (entry.fs_mgr_flags.wait && !WaitForFile(entry.blk_device, 20s)) {
2153             LERROR << "Skipping mkswap for '" << entry.blk_device << "'";
2154             ret = false;
2155             continue;
2156         }
2157 
2158         // Initialize the swap area.
2159         const char* mkswap_argv[2] = {
2160                 MKSWAP_BIN,
2161                 entry.blk_device.c_str(),
2162         };
2163         int err = logwrap_fork_execvp(ARRAY_SIZE(mkswap_argv), mkswap_argv, nullptr, false,
2164                                       LOG_KLOG, false, nullptr);
2165         if (err) {
2166             LERROR << "mkswap failed for " << entry.blk_device;
2167             ret = false;
2168             continue;
2169         }
2170 
2171         /* If -1, then no priority was specified in fstab, so don't set
2172          * SWAP_FLAG_PREFER or encode the priority */
2173         int flags = 0;
2174         if (entry.swap_prio >= 0) {
2175             flags = (entry.swap_prio << SWAP_FLAG_PRIO_SHIFT) & SWAP_FLAG_PRIO_MASK;
2176             flags |= SWAP_FLAG_PREFER;
2177         } else {
2178             flags = 0;
2179         }
2180         err = swapon(entry.blk_device.c_str(), flags);
2181         if (err) {
2182             LERROR << "swapon failed for " << entry.blk_device;
2183             ret = false;
2184         }
2185     }
2186 
2187     return ret;
2188 }
2189 
fs_mgr_is_verity_enabled(const FstabEntry & entry)2190 bool fs_mgr_is_verity_enabled(const FstabEntry& entry) {
2191     if (!entry.fs_mgr_flags.avb) {
2192         return false;
2193     }
2194 
2195     DeviceMapper& dm = DeviceMapper::Instance();
2196 
2197     std::string mount_point = GetVerityDeviceName(entry);
2198     if (dm.GetState(mount_point) == DmDeviceState::INVALID) {
2199         return false;
2200     }
2201 
2202     std::vector<DeviceMapper::TargetInfo> table;
2203     if (!dm.GetTableStatus(mount_point, &table) || table.empty() || table[0].data.empty()) {
2204         return false;
2205     }
2206 
2207     auto status = table[0].data.c_str();
2208     if (*status == 'C' || *status == 'V') {
2209         return true;
2210     }
2211 
2212     return false;
2213 }
2214 
fs_mgr_get_hashtree_info(const android::fs_mgr::FstabEntry & entry)2215 std::optional<HashtreeInfo> fs_mgr_get_hashtree_info(const android::fs_mgr::FstabEntry& entry) {
2216     if (!entry.fs_mgr_flags.avb) {
2217         return {};
2218     }
2219     DeviceMapper& dm = DeviceMapper::Instance();
2220     std::string device = GetVerityDeviceName(entry);
2221 
2222     std::vector<DeviceMapper::TargetInfo> table;
2223     if (dm.GetState(device) == DmDeviceState::INVALID || !dm.GetTableInfo(device, &table)) {
2224         return {};
2225     }
2226     for (const auto& target : table) {
2227         if (strcmp(target.spec.target_type, "verity") != 0) {
2228             continue;
2229         }
2230 
2231         // The format is stable for dm-verity version 0 & 1. And the data is expected to have
2232         // the fixed format:
2233         // <version> <dev> <hash_dev> <data_block_size> <hash_block_size> <num_data_blocks>
2234         // <hash_start_block> <algorithm> <digest> <salt>
2235         // Details in https://www.kernel.org/doc/html/latest/admin-guide/device-mapper/verity.html
2236 
2237         std::vector<std::string> tokens = android::base::Split(target.data, " \t\r\n");
2238         if (tokens[0] != "0" && tokens[0] != "1") {
2239             LOG(WARNING) << "Unrecognized device mapper version in " << target.data;
2240         }
2241 
2242         // Hashtree algorithm & root digest are the 8th & 9th token in the output.
2243         return HashtreeInfo{
2244                 .algorithm = android::base::Trim(tokens[7]),
2245                 .root_digest = android::base::Trim(tokens[8]),
2246                 .check_at_most_once = target.data.find("check_at_most_once") != std::string::npos};
2247     }
2248 
2249     return {};
2250 }
2251 
fs_mgr_verity_is_check_at_most_once(const android::fs_mgr::FstabEntry & entry)2252 bool fs_mgr_verity_is_check_at_most_once(const android::fs_mgr::FstabEntry& entry) {
2253     auto hashtree_info = fs_mgr_get_hashtree_info(entry);
2254     if (!hashtree_info) return false;
2255     return hashtree_info->check_at_most_once;
2256 }
2257 
fs_mgr_get_super_partition_name(int slot)2258 std::string fs_mgr_get_super_partition_name(int slot) {
2259     // Devices upgrading to dynamic partitions are allowed to specify a super
2260     // partition name. This includes cuttlefish, which is a non-A/B device.
2261     std::string super_partition;
2262     if (fs_mgr_get_boot_config("force_super_partition", &super_partition)) {
2263         return super_partition;
2264     }
2265     if (fs_mgr_get_boot_config("super_partition", &super_partition)) {
2266         if (fs_mgr_get_slot_suffix().empty()) {
2267             return super_partition;
2268         }
2269         std::string suffix;
2270         if (slot == 0) {
2271             suffix = "_a";
2272         } else if (slot == 1) {
2273             suffix = "_b";
2274         } else if (slot == -1) {
2275             suffix = fs_mgr_get_slot_suffix();
2276         }
2277         return super_partition + suffix;
2278     }
2279     return LP_METADATA_DEFAULT_PARTITION_NAME;
2280 }
2281 
fs_mgr_create_canonical_mount_point(const std::string & mount_point)2282 bool fs_mgr_create_canonical_mount_point(const std::string& mount_point) {
2283     auto saved_errno = errno;
2284     auto ok = true;
2285     auto created_mount_point = !mkdir(mount_point.c_str(), 0755);
2286     std::string real_mount_point;
2287     if (!Realpath(mount_point, &real_mount_point)) {
2288         ok = false;
2289         PERROR << "failed to realpath(" << mount_point << ")";
2290     } else if (mount_point != real_mount_point) {
2291         ok = false;
2292         LERROR << "mount point is not canonical: realpath(" << mount_point << ") -> "
2293                << real_mount_point;
2294     }
2295     if (!ok && created_mount_point) {
2296         rmdir(mount_point.c_str());
2297     }
2298     errno = saved_errno;
2299     return ok;
2300 }
2301 
fs_mgr_mount_overlayfs_fstab_entry(const FstabEntry & entry)2302 bool fs_mgr_mount_overlayfs_fstab_entry(const FstabEntry& entry) {
2303     const auto overlayfs_check_result = android::fs_mgr::CheckOverlayfs();
2304     if (!overlayfs_check_result.supported) {
2305         LERROR << __FUNCTION__ << "(): kernel does not support overlayfs";
2306         return false;
2307     }
2308 
2309 #if ALLOW_ADBD_DISABLE_VERITY == 0
2310     // Allowlist the mount point if user build.
2311     static const std::vector<const std::string> kAllowedPaths = {
2312             "/odm",         "/odm_dlkm",   "/oem",    "/product",
2313             "/system_dlkm", "/system_ext", "/vendor", "/vendor_dlkm",
2314     };
2315     static const std::vector<const std::string> kAllowedPrefixes = {
2316             "/mnt/product/",
2317             "/mnt/vendor/",
2318     };
2319     if (std::none_of(kAllowedPaths.begin(), kAllowedPaths.end(),
2320                      [&entry](const auto& path) -> bool {
2321                          return entry.mount_point == path ||
2322                                 StartsWith(entry.mount_point, path + "/");
2323                      }) &&
2324         std::none_of(kAllowedPrefixes.begin(), kAllowedPrefixes.end(),
2325                      [&entry](const auto& prefix) -> bool {
2326                          return entry.mount_point != prefix &&
2327                                 StartsWith(entry.mount_point, prefix);
2328                      })) {
2329         LERROR << __FUNCTION__
2330                << "(): mount point is forbidden on user build: " << entry.mount_point;
2331         return false;
2332     }
2333 #endif  // ALLOW_ADBD_DISABLE_VERITY == 0
2334 
2335     if (!fs_mgr_create_canonical_mount_point(entry.mount_point)) {
2336         return false;
2337     }
2338 
2339     auto lowerdir = entry.lowerdir;
2340     if (entry.fs_mgr_flags.overlayfs_remove_missing_lowerdir) {
2341         bool removed_any = false;
2342         std::vector<std::string> lowerdirs;
2343         for (const auto& dir : android::base::Split(entry.lowerdir, ":")) {
2344             if (access(dir.c_str(), F_OK)) {
2345                 PWARNING << __FUNCTION__ << "(): remove missing lowerdir '" << dir << "'";
2346                 removed_any = true;
2347             } else {
2348                 lowerdirs.push_back(dir);
2349             }
2350         }
2351         if (removed_any) {
2352             lowerdir = android::base::Join(lowerdirs, ":");
2353         }
2354     }
2355 
2356     const auto options = "lowerdir=" + lowerdir + overlayfs_check_result.mount_flags;
2357 
2358     // Use "overlay-" + entry.blk_device as the mount() source, so that adb-remout-test don't
2359     // confuse this with adb remount overlay, whose device name is "overlay".
2360     // Overlayfs is a pseudo filesystem, so the source device is a symbolic value and isn't used to
2361     // back the filesystem. However the device name would be shown in /proc/mounts.
2362     auto source = "overlay-" + entry.blk_device;
2363     auto report = "__mount(source=" + source + ",target=" + entry.mount_point + ",type=overlay," +
2364                   options + ")=";
2365     auto ret = mount(source.c_str(), entry.mount_point.c_str(), "overlay", MS_RDONLY | MS_NOATIME,
2366                      options.c_str());
2367     if (ret) {
2368         PERROR << report << ret;
2369         return false;
2370     }
2371     LINFO << report << ret;
2372     return true;
2373 }
2374 
fs_mgr_load_verity_state(int * mode)2375 bool fs_mgr_load_verity_state(int* mode) {
2376     // unless otherwise specified, use EIO mode.
2377     *mode = VERITY_MODE_EIO;
2378 
2379     // The bootloader communicates verity mode via the kernel commandline
2380     std::string verity_mode;
2381     if (!fs_mgr_get_boot_config("veritymode", &verity_mode)) {
2382         return false;
2383     }
2384 
2385     if (verity_mode == "enforcing") {
2386         *mode = VERITY_MODE_DEFAULT;
2387     } else if (verity_mode == "logging") {
2388         *mode = VERITY_MODE_LOGGING;
2389     }
2390 
2391     return true;
2392 }
2393 
fs_mgr_filesystem_available(const std::string & filesystem)2394 bool fs_mgr_filesystem_available(const std::string& filesystem) {
2395     std::string filesystems;
2396     if (!android::base::ReadFileToString("/proc/filesystems", &filesystems)) return false;
2397     return filesystems.find("\t" + filesystem + "\n") != std::string::npos;
2398 }
2399 
fs_mgr_get_context(const std::string & mount_point)2400 std::string fs_mgr_get_context(const std::string& mount_point) {
2401     char* ctx = nullptr;
2402     if (getfilecon(mount_point.c_str(), &ctx) == -1) {
2403         PERROR << "getfilecon " << mount_point;
2404         return "";
2405     }
2406 
2407     std::string context(ctx);
2408     free(ctx);
2409     return context;
2410 }
2411 
2412 namespace android {
2413 namespace fs_mgr {
2414 
CheckOverlayfs()2415 OverlayfsCheckResult CheckOverlayfs() {
2416     if (!fs_mgr_filesystem_available("overlay")) {
2417         return {.supported = false};
2418     }
2419     struct utsname uts;
2420     if (uname(&uts) == -1) {
2421         return {.supported = false};
2422     }
2423     int major, minor;
2424     if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
2425         return {.supported = false};
2426     }
2427     // Overlayfs available in the kernel, and patched for override_creds?
2428     if (access("/sys/module/overlay/parameters/override_creds", F_OK) == 0) {
2429         auto mount_flags = ",override_creds=off"s;
2430         if (major > 5 || (major == 5 && minor >= 15)) {
2431             mount_flags += ",userxattr"s;
2432         }
2433         return {.supported = true, .mount_flags = mount_flags};
2434     }
2435     if (major < 4 || (major == 4 && minor <= 3)) {
2436         return {.supported = true};
2437     }
2438     return {.supported = false};
2439 }
2440 
2441 }  // namespace fs_mgr
2442 }  // namespace android
2443