1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 mod file;
18 mod mount;
19 
20 use anyhow::{anyhow, bail, Result};
21 use fuse::filesystem::{
22     Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, GetxattrReply,
23     SetattrValid, ZeroCopyReader, ZeroCopyWriter,
24 };
25 use fuse::sys::OpenOptions as FuseOpenOptions;
26 use log::{error, trace, warn};
27 use std::collections::{btree_map, BTreeMap};
28 use std::convert::{TryFrom, TryInto};
29 use std::ffi::{CStr, CString, OsStr};
30 use std::io;
31 use std::mem::{zeroed, MaybeUninit};
32 use std::option::Option;
33 use std::os::unix::ffi::OsStrExt;
34 use std::path::{Component, Path, PathBuf};
35 use std::sync::atomic::{AtomicU64, Ordering};
36 use std::sync::{Arc, RwLock};
37 use std::time::Duration;
38 
39 use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
40 use crate::file::{
41     validate_basename, Attr, InMemoryDir, RandomWrite, ReadByChunk, RemoteDirEditor,
42     RemoteFileEditor, RemoteFileReader,
43 };
44 use crate::fsstat::RemoteFsStatsReader;
45 use crate::fsverity::VerifiedFileEditor;
46 
47 pub use self::file::LazyVerifiedReadonlyFile;
48 pub use self::mount::mount_and_enter_message_loop;
49 use self::mount::MAX_WRITE_BYTES;
50 
51 pub type Inode = u64;
52 type Handle = u64;
53 
54 /// Maximum time for a file's metadata to be cached by the kernel. Since any file and directory
55 /// changes (if not read-only) has to go through AuthFS to be trusted, the timeout can be maximum.
56 const DEFAULT_METADATA_TIMEOUT: Duration = Duration::MAX;
57 
58 const ROOT_INODE: Inode = 1;
59 
60 /// `AuthFsEntry` defines the filesystem entry type supported by AuthFS.
61 pub enum AuthFsEntry {
62     /// A read-only directory (writable during initialization). Root directory is an example.
63     ReadonlyDirectory { dir: InMemoryDir },
64     /// A file type that is verified against fs-verity signature (thus read-only). The file is
65     /// served from a remote server.
66     VerifiedReadonly { reader: LazyVerifiedReadonlyFile },
67     /// A file type that is a read-only passthrough from a file on a remote server.
68     UnverifiedReadonly { reader: RemoteFileReader, file_size: u64 },
69     /// A file type that is initially empty, and the content is stored on a remote server. File
70     /// integrity is guaranteed with private Merkle tree.
71     VerifiedNew { editor: VerifiedFileEditor<RemoteFileEditor>, attr: Attr },
72     /// A directory type that is initially empty. One can create new file (`VerifiedNew`) and new
73     /// directory (`VerifiedNewDirectory` itself) with integrity guaranteed within the VM.
74     VerifiedNewDirectory { dir: RemoteDirEditor, attr: Attr },
75 }
76 
77 impl AuthFsEntry {
expect_empty_deletable_directory(&self) -> io::Result<()>78     fn expect_empty_deletable_directory(&self) -> io::Result<()> {
79         match self {
80             AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
81                 if dir.number_of_entries() == 0 {
82                     Ok(())
83                 } else {
84                     Err(io::Error::from_raw_os_error(libc::ENOTEMPTY))
85                 }
86             }
87             AuthFsEntry::ReadonlyDirectory { .. } => {
88                 Err(io::Error::from_raw_os_error(libc::EACCES))
89             }
90             _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
91         }
92     }
93 }
94 
95 struct InodeState {
96     /// Actual inode entry.
97     entry: AuthFsEntry,
98 
99     /// Number of `Handle`s (i.e. file descriptors) that are currently referring to the this inode.
100     ///
101     /// Technically, this does not matter to readonly entries, since they live forever. The
102     /// reference count is only needed for manageing lifetime of writable entries like `VerifiedNew`
103     /// and `VerifiedNewDirectory`. That is, when an entry is deleted, the actual entry needs to
104     /// stay alive until the reference count reaches zero.
105     ///
106     /// Note: This is not to be confused with hardlinks, which AuthFS doesn't currently implement.
107     handle_ref_count: AtomicU64,
108 
109     /// Whether the inode is already unlinked, i.e. should be removed, once `handle_ref_count` is
110     /// down to zero.
111     unlinked: bool,
112 }
113 
114 impl InodeState {
new(entry: AuthFsEntry) -> Self115     fn new(entry: AuthFsEntry) -> Self {
116         InodeState { entry, handle_ref_count: AtomicU64::new(0), unlinked: false }
117     }
118 
new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self119     fn new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self {
120         InodeState { entry, handle_ref_count: AtomicU64::new(handle_ref_count), unlinked: false }
121     }
122 }
123 
124 /// Data type that a directory implementation should be able to present its entry to `AuthFs`.
125 #[derive(Clone)]
126 pub struct AuthFsDirEntry {
127     pub inode: Inode,
128     pub name: CString,
129     pub is_dir: bool,
130 }
131 
132 /// A snapshot of a directory entries for supporting `readdir` operation.
133 ///
134 /// The `readdir` implementation is required by FUSE to not return any entries that have been
135 /// returned previously (while it's fine to not return new entries). Snapshot is the easiest way to
136 /// be compliant. See `fuse::filesystem::readdir` for more details.
137 ///
138 /// A `DirEntriesSnapshot` is created on `opendir`, and is associated with the returned
139 /// `Handle`/FD. The snapshot is deleted when the handle is released in `releasedir`.
140 type DirEntriesSnapshot = Vec<AuthFsDirEntry>;
141 
142 /// An iterator for reading from `DirEntriesSnapshot`.
143 pub struct DirEntriesSnapshotIterator {
144     /// A reference to the `DirEntriesSnapshot` in `AuthFs`.
145     snapshot: Arc<DirEntriesSnapshot>,
146 
147     /// A value determined by `Self` to identify the last entry. 0 is a reserved value by FUSE to
148     /// mean reading from the beginning.
149     prev_offset: usize,
150 }
151 
152 impl DirectoryIterator for DirEntriesSnapshotIterator {
next(&mut self) -> Option<DirEntry>153     fn next(&mut self) -> Option<DirEntry> {
154         // This iterator should not be the only reference to the snapshot. The snapshot should
155         // still be hold in `dir_handle_table`, i.e. when the FD is not yet closed.
156         //
157         // This code is unreachable when `readdir` is called with a closed FD. Only when the FD is
158         // not yet closed, `DirEntriesSnapshotIterator` can be created (but still short-lived
159         // during `readdir`).
160         debug_assert!(Arc::strong_count(&self.snapshot) >= 2);
161 
162         // Since 0 is reserved, let's use 1-based index for the offset. This allows us to
163         // resume from the previous read in the snapshot easily.
164         let current_offset = if self.prev_offset == 0 {
165             1 // first element in the vector
166         } else {
167             self.prev_offset + 1 // next element in the vector
168         };
169         if current_offset > self.snapshot.len() {
170             None
171         } else {
172             let AuthFsDirEntry { inode, name, is_dir } = &self.snapshot[current_offset - 1];
173             let entry = DirEntry {
174                 offset: current_offset as u64,
175                 ino: *inode,
176                 name,
177                 type_: if *is_dir { libc::DT_DIR.into() } else { libc::DT_REG.into() },
178             };
179             self.prev_offset = current_offset;
180             Some(entry)
181         }
182     }
183 }
184 
185 type DirHandleTable = BTreeMap<Handle, Arc<DirEntriesSnapshot>>;
186 
187 // AuthFS needs to be `Sync` to be used with the `fuse` crate.
188 pub struct AuthFs {
189     /// Table for `Inode` to `InodeState` lookup.
190     inode_table: RwLock<BTreeMap<Inode, InodeState>>,
191 
192     /// The next available inode number.
193     next_inode: AtomicU64,
194 
195     /// Table for `Handle` to `Arc<DirEntriesSnapshot>` lookup. On `opendir`, a new directory handle
196     /// is created and the snapshot of the current directory is created. This is not super
197     /// efficient, but is the simplest way to be compliant to the FUSE contract (see
198     /// `fuse::filesystem::readdir`).
199     ///
200     /// Currently, no code locks `dir_handle_table` and `inode_table` at the same time to avoid
201     /// deadlock.
202     dir_handle_table: RwLock<DirHandleTable>,
203 
204     /// The next available handle number.
205     next_handle: AtomicU64,
206 
207     /// A reader to access the remote filesystem stats, which is supposed to be of "the" output
208     /// directory. We assume all output are stored in the same partition.
209     remote_fs_stats_reader: RemoteFsStatsReader,
210 }
211 
212 // Implementation for preparing an `AuthFs` instance, before starting to serve.
213 // TODO(victorhsieh): Consider implement a builder to separate the mutable initialization from the
214 // immutable / interiorly mutable serving phase.
215 impl AuthFs {
new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs216     pub fn new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs {
217         let mut inode_table = BTreeMap::new();
218         inode_table.insert(
219             ROOT_INODE,
220             InodeState::new(AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() }),
221         );
222 
223         AuthFs {
224             inode_table: RwLock::new(inode_table),
225             next_inode: AtomicU64::new(ROOT_INODE + 1),
226             dir_handle_table: RwLock::new(BTreeMap::new()),
227             next_handle: AtomicU64::new(1),
228             remote_fs_stats_reader,
229         }
230     }
231 
232     /// Add an `AuthFsEntry` as `basename` to the filesystem root.
add_entry_at_root_dir( &mut self, basename: PathBuf, entry: AuthFsEntry, ) -> Result<Inode>233     pub fn add_entry_at_root_dir(
234         &mut self,
235         basename: PathBuf,
236         entry: AuthFsEntry,
237     ) -> Result<Inode> {
238         validate_basename(&basename)?;
239         self.add_entry_at_ro_dir_by_path(ROOT_INODE, &basename, entry)
240     }
241 
242     /// Add an `AuthFsEntry` by path from the `ReadonlyDirectory` represented by `dir_inode`. The
243     /// path must be a related path. If some ancestor directories do not exist, they will be
244     /// created (also as `ReadonlyDirectory`) automatically.
add_entry_at_ro_dir_by_path( &mut self, dir_inode: Inode, path: &Path, entry: AuthFsEntry, ) -> Result<Inode>245     pub fn add_entry_at_ro_dir_by_path(
246         &mut self,
247         dir_inode: Inode,
248         path: &Path,
249         entry: AuthFsEntry,
250     ) -> Result<Inode> {
251         // 1. Make sure the parent directories all exist. Derive the entry's parent inode.
252         let parent_path =
253             path.parent().ok_or_else(|| anyhow!("No parent directory: {:?}", path))?;
254         let parent_inode =
255             parent_path.components().try_fold(dir_inode, |current_dir_inode, path_component| {
256                 match path_component {
257                     Component::RootDir => bail!("Absolute path is not supported"),
258                     Component::Normal(name) => {
259                         let inode_table = self.inode_table.get_mut().unwrap();
260                         // Locate the internal directory structure.
261                         let current_dir_entry = &mut inode_table
262                             .get_mut(&current_dir_inode)
263                             .ok_or_else(|| {
264                                 anyhow!("Unknown directory inode {}", current_dir_inode)
265                             })?
266                             .entry;
267                         let dir = match current_dir_entry {
268                             AuthFsEntry::ReadonlyDirectory { dir } => dir,
269                             _ => unreachable!("Not a ReadonlyDirectory"),
270                         };
271                         // Return directory inode. Create first if not exists.
272                         if let Some(existing_inode) = dir.lookup_inode(name.as_ref()) {
273                             Ok(existing_inode)
274                         } else {
275                             let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
276                             let new_dir_entry =
277                                 AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() };
278 
279                             // Actually update the tables.
280                             dir.add_dir(name.as_ref(), new_inode)?;
281                             if inode_table
282                                 .insert(new_inode, InodeState::new(new_dir_entry))
283                                 .is_some()
284                             {
285                                 bail!("Unexpected to find a duplicated inode");
286                             }
287                             Ok(new_inode)
288                         }
289                     }
290                     _ => Err(anyhow!("Path is not canonical: {:?}", path)),
291                 }
292             })?;
293 
294         // 2. Insert the entry to the parent directory, as well as the inode table.
295         let inode_table = self.inode_table.get_mut().unwrap();
296         let inode_state = inode_table.get_mut(&parent_inode).expect("previously returned inode");
297         match &mut inode_state.entry {
298             AuthFsEntry::ReadonlyDirectory { dir } => {
299                 let basename =
300                     path.file_name().ok_or_else(|| anyhow!("Bad file name: {:?}", path))?;
301                 let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
302 
303                 // Actually update the tables.
304                 dir.add_file(basename.as_ref(), new_inode)?;
305                 if inode_table.insert(new_inode, InodeState::new(entry)).is_some() {
306                     bail!("Unexpected to find a duplicated inode");
307                 }
308                 Ok(new_inode)
309             }
310             _ => unreachable!("Not a ReadonlyDirectory"),
311         }
312     }
313 }
314 
315 // Implementation for serving requests.
316 impl AuthFs {
317     /// Handles the file associated with `inode` if found. This function returns whatever
318     /// `handle_fn` returns.
handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R> where F: FnOnce(&AuthFsEntry) -> io::Result<R>,319     fn handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R>
320     where
321         F: FnOnce(&AuthFsEntry) -> io::Result<R>,
322     {
323         let inode_table = self.inode_table.read().unwrap();
324         handle_inode_locked(&inode_table, inode, |inode_state| handle_fn(&inode_state.entry))
325     }
326 
327     /// Adds a new entry `name` created by `create_fn` at `parent_inode`, with an initial ref count
328     /// of one.
329     ///
330     /// The operation involves two updates: adding the name with a new allocated inode to the
331     /// parent directory, and insert the new inode and the actual `AuthFsEntry` to the global inode
332     /// table.
333     ///
334     /// `create_fn` receives the parent directory, through which it can create the new entry at and
335     /// register the new inode to. Its returned entry is then added to the inode table.
create_new_entry_with_ref_count<F>( &self, parent_inode: Inode, name: &CStr, create_fn: F, ) -> io::Result<Inode> where F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,336     fn create_new_entry_with_ref_count<F>(
337         &self,
338         parent_inode: Inode,
339         name: &CStr,
340         create_fn: F,
341     ) -> io::Result<Inode>
342     where
343         F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,
344     {
345         let mut inode_table = self.inode_table.write().unwrap();
346         let (new_inode, new_file_entry) = handle_inode_mut_locked(
347             &mut inode_table,
348             &parent_inode,
349             |InodeState { entry, .. }| {
350                 let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
351                 let basename: &Path = cstr_to_path(name);
352                 let new_file_entry = create_fn(entry, basename, new_inode)?;
353                 Ok((new_inode, new_file_entry))
354             },
355         )?;
356 
357         if let btree_map::Entry::Vacant(entry) = inode_table.entry(new_inode) {
358             entry.insert(InodeState::new_with_ref_count(new_file_entry, 1));
359             Ok(new_inode)
360         } else {
361             unreachable!("Unexpected duplication of inode {}", new_inode);
362         }
363     }
364 
open_dir_store_snapshot( &self, dir_entries: Vec<AuthFsDirEntry>, ) -> io::Result<(Option<Handle>, FuseOpenOptions)>365     fn open_dir_store_snapshot(
366         &self,
367         dir_entries: Vec<AuthFsDirEntry>,
368     ) -> io::Result<(Option<Handle>, FuseOpenOptions)> {
369         let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
370         let mut dir_handle_table = self.dir_handle_table.write().unwrap();
371         if let btree_map::Entry::Vacant(value) = dir_handle_table.entry(handle) {
372             value.insert(Arc::new(dir_entries));
373             Ok((Some(handle), FuseOpenOptions::empty()))
374         } else {
375             unreachable!("Unexpected to see new handle {} to existing in the table", handle);
376         }
377     }
378 }
379 
check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()>380 fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
381     if (flags & libc::O_ACCMODE as u32) == mode as u32 {
382         Ok(())
383     } else {
384         Err(io::Error::from_raw_os_error(libc::EACCES))
385     }
386 }
387 
388 cfg_if::cfg_if! {
389     if #[cfg(all(any(target_arch = "aarch64", target_arch = "riscv64"),
390                  target_pointer_width = "64"))] {
391         fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
392     } else {
393         fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
394     }
395 }
396 
397 #[allow(clippy::enum_variant_names)]
398 enum AccessMode {
399     ReadOnly,
400     Variable(u32),
401 }
402 
create_stat( ino: libc::ino_t, file_size: u64, access_mode: AccessMode, ) -> io::Result<libc::stat64>403 fn create_stat(
404     ino: libc::ino_t,
405     file_size: u64,
406     access_mode: AccessMode,
407 ) -> io::Result<libc::stat64> {
408     // SAFETY: stat64 is a plan C struct without pointer.
409     let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
410 
411     st.st_ino = ino;
412     st.st_mode = match access_mode {
413         AccessMode::ReadOnly => {
414             // Until needed, let's just grant the owner access.
415             libc::S_IFREG | libc::S_IRUSR
416         }
417         AccessMode::Variable(mode) => libc::S_IFREG | mode,
418     };
419     st.st_nlink = 1;
420     st.st_uid = 0;
421     st.st_gid = 0;
422     st.st_size = libc::off64_t::try_from(file_size)
423         .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
424     st.st_blksize = blk_size();
425     // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
426     st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
427         .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
428     Ok(st)
429 }
430 
create_dir_stat( ino: libc::ino_t, file_number: u16, access_mode: AccessMode, ) -> io::Result<libc::stat64>431 fn create_dir_stat(
432     ino: libc::ino_t,
433     file_number: u16,
434     access_mode: AccessMode,
435 ) -> io::Result<libc::stat64> {
436     // SAFETY: stat64 is a plan C struct without pointer.
437     let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
438 
439     st.st_ino = ino;
440     st.st_mode = match access_mode {
441         AccessMode::ReadOnly => {
442             // Until needed, let's just grant the owner access and search to group and others.
443             libc::S_IFDIR | libc::S_IXUSR | libc::S_IRUSR | libc::S_IXGRP | libc::S_IXOTH
444         }
445         AccessMode::Variable(mode) => libc::S_IFDIR | mode,
446     };
447 
448     // 2 extra for . and ..
449     st.st_nlink = file_number
450         .checked_add(2)
451         .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?
452         .into();
453 
454     st.st_uid = 0;
455     st.st_gid = 0;
456     Ok(st)
457 }
458 
offset_to_chunk_index(offset: u64) -> u64459 fn offset_to_chunk_index(offset: u64) -> u64 {
460     offset / CHUNK_SIZE
461 }
462 
read_chunks<W: io::Write, T: ReadByChunk>( mut w: W, file: &T, file_size: u64, offset: u64, size: u32, ) -> io::Result<usize>463 fn read_chunks<W: io::Write, T: ReadByChunk>(
464     mut w: W,
465     file: &T,
466     file_size: u64,
467     offset: u64,
468     size: u32,
469 ) -> io::Result<usize> {
470     let remaining = file_size.saturating_sub(offset);
471     let size_to_read = std::cmp::min(size as usize, remaining as usize);
472     let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
473         0,
474         |total, (current_offset, planned_data_size)| {
475             // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
476             // instead of accepting a buffer, the writer could expose the final destination buffer
477             // for the reader to write to. It might not be generally applicable though, e.g. with
478             // virtio transport, the buffer may not be continuous.
479             let mut buf = [0u8; CHUNK_SIZE as usize];
480             let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
481             if read_size < planned_data_size {
482                 return Err(io::Error::from_raw_os_error(libc::ENODATA));
483             }
484 
485             let begin = (current_offset % CHUNK_SIZE) as usize;
486             let end = begin + planned_data_size;
487             let s = w.write(&buf[begin..end])?;
488             if s != planned_data_size {
489                 return Err(io::Error::from_raw_os_error(libc::EIO));
490             }
491             Ok(total + s)
492         },
493     )?;
494 
495     Ok(total)
496 }
497 
498 impl FileSystem for AuthFs {
499     type Inode = Inode;
500     type Handle = Handle;
501     type DirIter = DirEntriesSnapshotIterator;
502 
max_buffer_size(&self) -> u32503     fn max_buffer_size(&self) -> u32 {
504         MAX_WRITE_BYTES
505     }
506 
init(&self, _capable: FsOptions) -> io::Result<FsOptions>507     fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
508         // Enable writeback cache for better performance especially since our bandwidth to the
509         // backend service is limited.
510         Ok(FsOptions::WRITEBACK_CACHE)
511     }
512 
lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry>513     fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
514         let inode_table = self.inode_table.read().unwrap();
515 
516         // Look up the entry's inode number in parent directory.
517         let inode =
518             handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
519                 AuthFsEntry::ReadonlyDirectory { dir } => {
520                     let path = cstr_to_path(name);
521                     dir.lookup_inode(path).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
522                 }
523                 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
524                     let path = cstr_to_path(name);
525                     dir.find_inode(path)
526                 }
527                 _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
528             })?;
529 
530         // Create the entry's stat if found.
531         let st = handle_inode_locked(
532             &inode_table,
533             &inode,
534             |InodeState { entry, handle_ref_count, .. }| {
535                 let st = match entry {
536                     AuthFsEntry::ReadonlyDirectory { dir } => {
537                         create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
538                     }
539                     AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
540                         create_stat(inode, *file_size, AccessMode::ReadOnly)
541                     }
542                     AuthFsEntry::VerifiedReadonly { reader } => {
543                         create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
544                     }
545                     AuthFsEntry::VerifiedNew { editor, attr, .. } => {
546                         create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
547                     }
548                     AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
549                         inode,
550                         dir.number_of_entries(),
551                         AccessMode::Variable(attr.mode()),
552                     ),
553                 }?;
554                 if handle_ref_count.fetch_add(1, Ordering::Relaxed) == u64::MAX {
555                     panic!("Handle reference count overflow");
556                 }
557                 Ok(st)
558             },
559         )?;
560 
561         Ok(Entry {
562             inode,
563             generation: 0,
564             attr: st,
565             entry_timeout: DEFAULT_METADATA_TIMEOUT,
566             attr_timeout: DEFAULT_METADATA_TIMEOUT,
567         })
568     }
569 
forget(&self, _ctx: Context, inode: Self::Inode, count: u64)570     fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) {
571         let mut inode_table = self.inode_table.write().unwrap();
572         let delete_now = handle_inode_mut_locked(
573             &mut inode_table,
574             &inode,
575             |InodeState { handle_ref_count, unlinked, .. }| {
576                 let current = handle_ref_count.get_mut();
577                 if count > *current {
578                     error!(
579                         "Trying to decrease refcount of inode {} by {} (> current {})",
580                         inode, count, *current
581                     );
582                     panic!(); // log to logcat with error!
583                 }
584                 *current -= count;
585                 Ok(*unlinked && *current == 0)
586             },
587         );
588 
589         match delete_now {
590             Ok(true) => {
591                 let _ignored = inode_table.remove(&inode).expect("Removed an existing entry");
592             }
593             Ok(false) => { /* Let the inode stay */ }
594             Err(e) => {
595                 warn!(
596                     "Unexpected failure when tries to forget an inode {} by refcount {}: {:?}",
597                     inode, count, e
598                 );
599             }
600         }
601     }
602 
getattr( &self, _ctx: Context, inode: Inode, _handle: Option<Handle>, ) -> io::Result<(libc::stat64, Duration)>603     fn getattr(
604         &self,
605         _ctx: Context,
606         inode: Inode,
607         _handle: Option<Handle>,
608     ) -> io::Result<(libc::stat64, Duration)> {
609         self.handle_inode(&inode, |config| {
610             Ok((
611                 match config {
612                     AuthFsEntry::ReadonlyDirectory { dir } => {
613                         create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
614                     }
615                     AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
616                         create_stat(inode, *file_size, AccessMode::ReadOnly)
617                     }
618                     AuthFsEntry::VerifiedReadonly { reader } => {
619                         create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
620                     }
621                     AuthFsEntry::VerifiedNew { editor, attr, .. } => {
622                         create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
623                     }
624                     AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
625                         inode,
626                         dir.number_of_entries(),
627                         AccessMode::Variable(attr.mode()),
628                     ),
629                 }?,
630                 DEFAULT_METADATA_TIMEOUT,
631             ))
632         })
633     }
634 
open( &self, _ctx: Context, inode: Self::Inode, flags: u32, ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)>635     fn open(
636         &self,
637         _ctx: Context,
638         inode: Self::Inode,
639         flags: u32,
640     ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
641         // Since file handle is not really used in later operations (which use Inode directly),
642         // return None as the handle.
643         self.handle_inode(&inode, |config| {
644             match config {
645                 AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
646                     check_access_mode(flags, libc::O_RDONLY)?;
647                 }
648                 AuthFsEntry::VerifiedNew { .. } => {
649                     // TODO(victorhsieh): Imeplement ACL check using the attr and ctx. Always allow
650                     // for now.
651                 }
652                 AuthFsEntry::ReadonlyDirectory { .. }
653                 | AuthFsEntry::VerifiedNewDirectory { .. } => {
654                     // TODO(victorhsieh): implement when needed.
655                     return Err(io::Error::from_raw_os_error(libc::ENOSYS));
656                 }
657             }
658             // Always cache the file content. There is currently no need to support direct I/O or
659             // avoid the cache buffer. Memory mapping is only possible with cache enabled.
660             Ok((None, FuseOpenOptions::KEEP_CACHE))
661         })
662     }
663 
create( &self, _ctx: Context, parent: Self::Inode, name: &CStr, mode: u32, _flags: u32, umask: u32, _security_ctx: Option<&CStr>, ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)>664     fn create(
665         &self,
666         _ctx: Context,
667         parent: Self::Inode,
668         name: &CStr,
669         mode: u32,
670         _flags: u32,
671         umask: u32,
672         _security_ctx: Option<&CStr>,
673     ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)> {
674         let new_inode = self.create_new_entry_with_ref_count(
675             parent,
676             name,
677             |parent_entry, basename, new_inode| match parent_entry {
678                 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
679                     if dir.has_entry(basename) {
680                         return Err(io::Error::from_raw_os_error(libc::EEXIST));
681                     }
682                     let mode = mode & !umask;
683                     let (new_file, new_attr) = dir.create_file(basename, new_inode, mode)?;
684                     Ok(AuthFsEntry::VerifiedNew { editor: new_file, attr: new_attr })
685                 }
686                 _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
687             },
688         )?;
689 
690         Ok((
691             Entry {
692                 inode: new_inode,
693                 generation: 0,
694                 attr: create_stat(new_inode, /* file_size */ 0, AccessMode::Variable(mode))?,
695                 entry_timeout: DEFAULT_METADATA_TIMEOUT,
696                 attr_timeout: DEFAULT_METADATA_TIMEOUT,
697             },
698             // See also `open`.
699             /* handle */ None,
700             FuseOpenOptions::KEEP_CACHE,
701         ))
702     }
703 
read<W: io::Write + ZeroCopyWriter>( &self, _ctx: Context, inode: Inode, _handle: Handle, w: W, size: u32, offset: u64, _lock_owner: Option<u64>, _flags: u32, ) -> io::Result<usize>704     fn read<W: io::Write + ZeroCopyWriter>(
705         &self,
706         _ctx: Context,
707         inode: Inode,
708         _handle: Handle,
709         w: W,
710         size: u32,
711         offset: u64,
712         _lock_owner: Option<u64>,
713         _flags: u32,
714     ) -> io::Result<usize> {
715         self.handle_inode(&inode, |config| {
716             match config {
717                 AuthFsEntry::VerifiedReadonly { reader } => {
718                     read_chunks(w, reader, reader.file_size()?, offset, size)
719                 }
720                 AuthFsEntry::UnverifiedReadonly { reader, file_size } => {
721                     read_chunks(w, reader, *file_size, offset, size)
722                 }
723                 AuthFsEntry::VerifiedNew { editor, .. } => {
724                     // Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
725                     // request a read even if the file is open with O_WRONLY.
726                     read_chunks(w, editor, editor.size(), offset, size)
727                 }
728                 AuthFsEntry::ReadonlyDirectory { .. }
729                 | AuthFsEntry::VerifiedNewDirectory { .. } => {
730                     Err(io::Error::from_raw_os_error(libc::EISDIR))
731                 }
732             }
733         })
734     }
735 
write<R: io::Read + ZeroCopyReader>( &self, _ctx: Context, inode: Self::Inode, _handle: Self::Handle, mut r: R, size: u32, offset: u64, _lock_owner: Option<u64>, _delayed_write: bool, _flags: u32, ) -> io::Result<usize>736     fn write<R: io::Read + ZeroCopyReader>(
737         &self,
738         _ctx: Context,
739         inode: Self::Inode,
740         _handle: Self::Handle,
741         mut r: R,
742         size: u32,
743         offset: u64,
744         _lock_owner: Option<u64>,
745         _delayed_write: bool,
746         _flags: u32,
747     ) -> io::Result<usize> {
748         self.handle_inode(&inode, |config| match config {
749             AuthFsEntry::VerifiedNew { editor, .. } => {
750                 let mut buf = vec![0; size as usize];
751                 r.read_exact(&mut buf)?;
752                 editor.write_at(&buf, offset)
753             }
754             AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
755                 Err(io::Error::from_raw_os_error(libc::EPERM))
756             }
757             AuthFsEntry::ReadonlyDirectory { .. } | AuthFsEntry::VerifiedNewDirectory { .. } => {
758                 Err(io::Error::from_raw_os_error(libc::EISDIR))
759             }
760         })
761     }
762 
setattr( &self, _ctx: Context, inode: Inode, in_attr: libc::stat64, _handle: Option<Handle>, valid: SetattrValid, ) -> io::Result<(libc::stat64, Duration)>763     fn setattr(
764         &self,
765         _ctx: Context,
766         inode: Inode,
767         in_attr: libc::stat64,
768         _handle: Option<Handle>,
769         valid: SetattrValid,
770     ) -> io::Result<(libc::stat64, Duration)> {
771         let mut inode_table = self.inode_table.write().unwrap();
772         handle_inode_mut_locked(&mut inode_table, &inode, |InodeState { entry, .. }| match entry {
773             AuthFsEntry::VerifiedNew { editor, attr } => {
774                 check_unsupported_setattr_request(valid)?;
775 
776                 // Initialize the default stat.
777                 let mut new_attr =
778                     create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))?;
779                 // `valid` indicates what fields in `attr` are valid. Update to return correctly.
780                 if valid.contains(SetattrValid::SIZE) {
781                     // st_size is i64, but the cast should be safe since kernel should not give a
782                     // negative size.
783                     debug_assert!(in_attr.st_size >= 0);
784                     new_attr.st_size = in_attr.st_size;
785                     editor.resize(in_attr.st_size as u64)?;
786                 }
787                 if valid.contains(SetattrValid::MODE) {
788                     attr.set_mode(in_attr.st_mode)?;
789                     new_attr.st_mode = in_attr.st_mode;
790                 }
791                 Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
792             }
793             AuthFsEntry::VerifiedNewDirectory { dir, attr } => {
794                 check_unsupported_setattr_request(valid)?;
795                 if valid.contains(SetattrValid::SIZE) {
796                     return Err(io::Error::from_raw_os_error(libc::EISDIR));
797                 }
798 
799                 // Initialize the default stat.
800                 let mut new_attr = create_dir_stat(
801                     inode,
802                     dir.number_of_entries(),
803                     AccessMode::Variable(attr.mode()),
804                 )?;
805                 if valid.contains(SetattrValid::MODE) {
806                     attr.set_mode(in_attr.st_mode)?;
807                     new_attr.st_mode = in_attr.st_mode;
808                 }
809                 Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
810             }
811             _ => Err(io::Error::from_raw_os_error(libc::EPERM)),
812         })
813     }
814 
getxattr( &self, _ctx: Context, inode: Self::Inode, name: &CStr, size: u32, ) -> io::Result<GetxattrReply>815     fn getxattr(
816         &self,
817         _ctx: Context,
818         inode: Self::Inode,
819         name: &CStr,
820         size: u32,
821     ) -> io::Result<GetxattrReply> {
822         self.handle_inode(&inode, |config| {
823             match config {
824                 AuthFsEntry::VerifiedNew { editor, .. } => {
825                     // FUSE ioctl is limited, thus we can't implement fs-verity ioctls without a kernel
826                     // change (see b/196635431). Until it's possible, use xattr to expose what we need
827                     // as an authfs specific API.
828                     if name != CStr::from_bytes_with_nul(b"authfs.fsverity.digest\0").unwrap() {
829                         return Err(io::Error::from_raw_os_error(libc::ENODATA));
830                     }
831 
832                     if size == 0 {
833                         // Per protocol, when size is 0, return the value size.
834                         Ok(GetxattrReply::Count(editor.get_fsverity_digest_size() as u32))
835                     } else {
836                         let digest = editor.calculate_fsverity_digest()?;
837                         if digest.len() > size as usize {
838                             Err(io::Error::from_raw_os_error(libc::ERANGE))
839                         } else {
840                             Ok(GetxattrReply::Value(digest.to_vec()))
841                         }
842                     }
843                 }
844                 _ => Err(io::Error::from_raw_os_error(libc::ENODATA)),
845             }
846         })
847     }
848 
mkdir( &self, _ctx: Context, parent: Self::Inode, name: &CStr, mode: u32, umask: u32, _security_ctx: Option<&CStr>, ) -> io::Result<Entry>849     fn mkdir(
850         &self,
851         _ctx: Context,
852         parent: Self::Inode,
853         name: &CStr,
854         mode: u32,
855         umask: u32,
856         _security_ctx: Option<&CStr>,
857     ) -> io::Result<Entry> {
858         let new_inode = self.create_new_entry_with_ref_count(
859             parent,
860             name,
861             |parent_entry, basename, new_inode| match parent_entry {
862                 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
863                     if dir.has_entry(basename) {
864                         return Err(io::Error::from_raw_os_error(libc::EEXIST));
865                     }
866                     let mode = mode & !umask;
867                     let (new_dir, new_attr) = dir.mkdir(basename, new_inode, mode)?;
868                     Ok(AuthFsEntry::VerifiedNewDirectory { dir: new_dir, attr: new_attr })
869                 }
870                 AuthFsEntry::ReadonlyDirectory { .. } => {
871                     Err(io::Error::from_raw_os_error(libc::EACCES))
872                 }
873                 _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
874             },
875         )?;
876 
877         Ok(Entry {
878             inode: new_inode,
879             generation: 0,
880             attr: create_dir_stat(new_inode, /* file_number */ 0, AccessMode::Variable(mode))?,
881             entry_timeout: DEFAULT_METADATA_TIMEOUT,
882             attr_timeout: DEFAULT_METADATA_TIMEOUT,
883         })
884     }
885 
unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()>886     fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
887         let mut inode_table = self.inode_table.write().unwrap();
888         handle_inode_mut_locked(
889             &mut inode_table,
890             &parent,
891             |InodeState { entry, unlinked, .. }| match entry {
892                 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
893                     let basename: &Path = cstr_to_path(name);
894                     // Delete the file from in both the local and remote directories.
895                     let _inode = dir.delete_file(basename)?;
896                     *unlinked = true;
897                     Ok(())
898                 }
899                 AuthFsEntry::ReadonlyDirectory { .. } => {
900                     Err(io::Error::from_raw_os_error(libc::EACCES))
901                 }
902                 AuthFsEntry::VerifiedNew { .. } => {
903                     // Deleting a entry in filesystem root is not currently supported.
904                     Err(io::Error::from_raw_os_error(libc::ENOSYS))
905                 }
906                 AuthFsEntry::UnverifiedReadonly { .. } | AuthFsEntry::VerifiedReadonly { .. } => {
907                     Err(io::Error::from_raw_os_error(libc::ENOTDIR))
908                 }
909             },
910         )
911     }
912 
rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()>913     fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
914         let mut inode_table = self.inode_table.write().unwrap();
915 
916         // Check before actual removal, with readonly borrow.
917         handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
918             AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
919                 let basename: &Path = cstr_to_path(name);
920                 let existing_inode = dir.find_inode(basename)?;
921                 handle_inode_locked(&inode_table, &existing_inode, |inode_state| {
922                     inode_state.entry.expect_empty_deletable_directory()
923                 })
924             }
925             AuthFsEntry::ReadonlyDirectory { .. } => {
926                 Err(io::Error::from_raw_os_error(libc::EACCES))
927             }
928             _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
929         })?;
930 
931         // Look up again, this time with mutable borrow. This needs to be done separately because
932         // the previous lookup needs to borrow multiple entry references in the table.
933         handle_inode_mut_locked(
934             &mut inode_table,
935             &parent,
936             |InodeState { entry, unlinked, .. }| match entry {
937                 AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
938                     let basename: &Path = cstr_to_path(name);
939                     let _inode = dir.force_delete_directory(basename)?;
940                     *unlinked = true;
941                     Ok(())
942                 }
943                 _ => unreachable!("Mismatched entry type that is just checked"),
944             },
945         )
946     }
947 
opendir( &self, _ctx: Context, inode: Self::Inode, _flags: u32, ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)>948     fn opendir(
949         &self,
950         _ctx: Context,
951         inode: Self::Inode,
952         _flags: u32,
953     ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
954         let entries = self.handle_inode(&inode, |config| match config {
955             AuthFsEntry::VerifiedNewDirectory { dir, .. } => dir.retrieve_entries(),
956             AuthFsEntry::ReadonlyDirectory { dir } => dir.retrieve_entries(),
957             _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
958         })?;
959         self.open_dir_store_snapshot(entries)
960     }
961 
readdir( &self, _ctx: Context, _inode: Self::Inode, handle: Self::Handle, _size: u32, offset: u64, ) -> io::Result<Self::DirIter>962     fn readdir(
963         &self,
964         _ctx: Context,
965         _inode: Self::Inode,
966         handle: Self::Handle,
967         _size: u32,
968         offset: u64,
969     ) -> io::Result<Self::DirIter> {
970         let dir_handle_table = self.dir_handle_table.read().unwrap();
971         if let Some(entry) = dir_handle_table.get(&handle) {
972             Ok(DirEntriesSnapshotIterator {
973                 snapshot: entry.clone(),
974                 prev_offset: offset.try_into().unwrap(),
975             })
976         } else {
977             Err(io::Error::from_raw_os_error(libc::EBADF))
978         }
979     }
980 
releasedir( &self, _ctx: Context, inode: Self::Inode, _flags: u32, handle: Self::Handle, ) -> io::Result<()>981     fn releasedir(
982         &self,
983         _ctx: Context,
984         inode: Self::Inode,
985         _flags: u32,
986         handle: Self::Handle,
987     ) -> io::Result<()> {
988         let mut dir_handle_table = self.dir_handle_table.write().unwrap();
989         if dir_handle_table.remove(&handle).is_none() {
990             unreachable!("Unknown directory handle {}, inode {}", handle, inode);
991         }
992         Ok(())
993     }
994 
statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64>995     fn statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64> {
996         let remote_stat = self.remote_fs_stats_reader.statfs()?;
997 
998         // SAFETY: We are zero-initializing a struct with only POD fields. Not all fields matter to
999         // FUSE. See also:
1000         // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/fuse/inode.c?h=v5.15#n460
1001         let mut st: libc::statvfs64 = unsafe { zeroed() };
1002 
1003         // Use the remote stat as a template, since it'd matter the most to consider the writable
1004         // files/directories that are written to the remote.
1005         st.f_bsize = remote_stat.block_size;
1006         st.f_frsize = remote_stat.fragment_size;
1007         st.f_blocks = remote_stat.block_numbers;
1008         st.f_bavail = remote_stat.block_available;
1009         st.f_favail = remote_stat.inodes_available;
1010         st.f_namemax = remote_stat.max_filename;
1011         // Assuming we are not privileged to use all free spaces on the remote server, set the free
1012         // blocks/fragment to the same available amount.
1013         st.f_bfree = st.f_bavail;
1014         st.f_ffree = st.f_favail;
1015         // Number of inodes on the filesystem
1016         st.f_files = self.inode_table.read().unwrap().len() as u64;
1017 
1018         Ok(st)
1019     }
1020 }
1021 
handle_inode_locked<F, R>( inode_table: &BTreeMap<Inode, InodeState>, inode: &Inode, handle_fn: F, ) -> io::Result<R> where F: FnOnce(&InodeState) -> io::Result<R>,1022 fn handle_inode_locked<F, R>(
1023     inode_table: &BTreeMap<Inode, InodeState>,
1024     inode: &Inode,
1025     handle_fn: F,
1026 ) -> io::Result<R>
1027 where
1028     F: FnOnce(&InodeState) -> io::Result<R>,
1029 {
1030     if let Some(inode_state) = inode_table.get(inode) {
1031         handle_fn(inode_state)
1032     } else {
1033         Err(io::Error::from_raw_os_error(libc::ENOENT))
1034     }
1035 }
1036 
handle_inode_mut_locked<F, R>( inode_table: &mut BTreeMap<Inode, InodeState>, inode: &Inode, handle_fn: F, ) -> io::Result<R> where F: FnOnce(&mut InodeState) -> io::Result<R>,1037 fn handle_inode_mut_locked<F, R>(
1038     inode_table: &mut BTreeMap<Inode, InodeState>,
1039     inode: &Inode,
1040     handle_fn: F,
1041 ) -> io::Result<R>
1042 where
1043     F: FnOnce(&mut InodeState) -> io::Result<R>,
1044 {
1045     if let Some(inode_state) = inode_table.get_mut(inode) {
1046         handle_fn(inode_state)
1047     } else {
1048         Err(io::Error::from_raw_os_error(libc::ENOENT))
1049     }
1050 }
1051 
check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()>1052 fn check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()> {
1053     if valid.contains(SetattrValid::UID) {
1054         warn!("Changing st_uid is not currently supported");
1055         return Err(io::Error::from_raw_os_error(libc::ENOSYS));
1056     }
1057     if valid.contains(SetattrValid::GID) {
1058         warn!("Changing st_gid is not currently supported");
1059         return Err(io::Error::from_raw_os_error(libc::ENOSYS));
1060     }
1061     if valid.intersects(
1062         SetattrValid::CTIME
1063             | SetattrValid::ATIME
1064             | SetattrValid::ATIME_NOW
1065             | SetattrValid::MTIME
1066             | SetattrValid::MTIME_NOW,
1067     ) {
1068         trace!("Ignoring ctime/atime/mtime change as authfs does not maintain timestamp currently");
1069     }
1070     Ok(())
1071 }
1072 
cstr_to_path(cstr: &CStr) -> &Path1073 fn cstr_to_path(cstr: &CStr) -> &Path {
1074     OsStr::from_bytes(cstr.to_bytes()).as_ref()
1075 }
1076