1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Zygote"
18 #define ATRACE_TAG ATRACE_TAG_DALVIK
19 
20 #include "com_android_internal_os_Zygote.h"
21 
22 #include <async_safe/log.h>
23 
24 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
25 #include <sys/mount.h>
26 #include <linux/fs.h>
27 #include <sys/types.h>
28 #include <dirent.h>
29 
30 #include <algorithm>
31 #include <array>
32 #include <atomic>
33 #include <functional>
34 #include <iterator>
35 #include <list>
36 #include <optional>
37 #include <sstream>
38 #include <string>
39 #include <string_view>
40 #include <unordered_set>
41 
42 #include <android/fdsan.h>
43 #include <arpa/inet.h>
44 #include <fcntl.h>
45 #include <grp.h>
46 #include <inttypes.h>
47 #include <malloc.h>
48 #include <mntent.h>
49 #include <paths.h>
50 #include <signal.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <sys/auxv.h>
54 #include <sys/capability.h>
55 #include <sys/cdefs.h>
56 #include <sys/eventfd.h>
57 #include <sys/personality.h>
58 #include <sys/prctl.h>
59 #include <sys/resource.h>
60 #include <sys/socket.h>
61 #include <sys/stat.h>
62 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
63 #include <sys/_system_properties.h>
64 #include <sys/time.h>
65 #include <sys/types.h>
66 #include <sys/un.h>
67 #include <sys/wait.h>
68 #include <unistd.h>
69 
70 #include <android-base/file.h>
71 #include <android-base/logging.h>
72 #include <android-base/properties.h>
73 #include <android-base/stringprintf.h>
74 #include <android-base/unique_fd.h>
75 #include <bionic/malloc.h>
76 #include <bionic/mte.h>
77 #include <cutils/fs.h>
78 #include <cutils/multiuser.h>
79 #include <cutils/sockets.h>
80 #include <private/android_filesystem_config.h>
81 #include <processgroup/processgroup.h>
82 #include <processgroup/sched_policy.h>
83 #include <seccomp_policy.h>
84 #include <selinux/android.h>
85 #include <stats_socket.h>
86 #include <utils/String8.h>
87 #include <utils/Trace.h>
88 
89 #include <nativehelper/JNIHelp.h>
90 #include <nativehelper/ScopedLocalRef.h>
91 #include <nativehelper/ScopedPrimitiveArray.h>
92 #include <nativehelper/ScopedUtfChars.h>
93 #include "core_jni_helpers.h"
94 #include "fd_utils.h"
95 #include "filesystem_utils.h"
96 
97 #include "nativebridge/native_bridge.h"
98 
99 #if defined(__BIONIC__)
100 extern "C" void android_reset_stack_guards();
101 #endif
102 
103 namespace {
104 
105 // TODO (chriswailes): Add a function to initialize native Zygote data.
106 // TODO (chriswailes): Fix mixed indentation style (2 and 4 spaces).
107 
108 using namespace std::placeholders;
109 
110 using android::String8;
111 using android::base::ReadFileToString;
112 using android::base::StringAppendF;
113 using android::base::StringPrintf;
114 using android::base::WriteStringToFile;
115 using android::base::GetBoolProperty;
116 
117 using android::zygote::ZygoteFailure;
118 
119 using Mode = android_mallopt_gwp_asan_options_t::Mode;
120 
121 // This type is duplicated in fd_utils.h
122 typedef const std::function<void(std::string)>& fail_fn_t;
123 
124 static pid_t gSystemServerPid = 0;
125 
126 static constexpr const char* kVoldAppDataIsolation = "persist.sys.vold_app_data_isolation_enabled";
127 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
128 static jclass gZygoteClass;
129 static jmethodID gCallPostForkSystemServerHooks;
130 static jmethodID gCallPostForkChildHooks;
131 
132 static constexpr const char* kZygoteInitClassName = "com/android/internal/os/ZygoteInit";
133 static jclass gZygoteInitClass;
134 static jmethodID gGetOrCreateSystemServerClassLoader;
135 static jmethodID gPrefetchStandaloneSystemServerJars;
136 
137 static bool gIsSecurityEnforced = true;
138 
139 /**
140  * True if the app process is running in its mount namespace.
141  */
142 static bool gInAppMountNamespace = false;
143 
144 /**
145  * The maximum number of characters (not including a null terminator) that a
146  * process name may contain.
147  */
148 static constexpr size_t MAX_NAME_LENGTH = 15;
149 
150 /**
151  * The file descriptor for the Zygote socket opened by init.
152  */
153 
154 static int gZygoteSocketFD = -1;
155 
156 /**
157  * The file descriptor for the unspecialized app process (USAP) pool socket opened by init.
158  */
159 
160 static int gUsapPoolSocketFD = -1;
161 
162 /**
163  * The number of USAPs currently in this Zygote's pool.
164  */
165 static std::atomic_uint32_t gUsapPoolCount = 0;
166 
167 /**
168  * Event file descriptor used to communicate reaped USAPs to the
169  * ZygoteServer.
170  */
171 static int gUsapPoolEventFD = -1;
172 
173 /**
174  * The socket file descriptor used to send notifications to the
175  * system_server.
176  */
177 static int gSystemServerSocketFd = -1;
178 
179 static constexpr int DEFAULT_DATA_DIR_PERMISSION = 0751;
180 
181 static constexpr const uint64_t UPPER_HALF_WORD_MASK = 0xFFFF'FFFF'0000'0000;
182 static constexpr const uint64_t LOWER_HALF_WORD_MASK = 0x0000'0000'FFFF'FFFF;
183 
184 static constexpr const char* kCurProfileDirPath = "/data/misc/profiles/cur";
185 static constexpr const char* kRefProfileDirPath = "/data/misc/profiles/ref";
186 
187 /**
188  * The maximum value that the gUSAPPoolSizeMax variable may take.  This value
189  * is a mirror of ZygoteServer.USAP_POOL_SIZE_MAX_LIMIT
190  */
191 static constexpr int USAP_POOL_SIZE_MAX_LIMIT = 100;
192 
193 /** The numeric value for the maximum priority a process may possess. */
194 static constexpr int PROCESS_PRIORITY_MAX = -20;
195 
196 /** The numeric value for the minimum priority a process may possess. */
197 static constexpr int PROCESS_PRIORITY_MIN = 19;
198 
199 /** The numeric value for the normal priority a process should have. */
200 static constexpr int PROCESS_PRIORITY_DEFAULT = 0;
201 
202 /** Exponential back off parameters for storage dir check. */
203 static constexpr unsigned int STORAGE_DIR_CHECK_RETRY_MULTIPLIER = 2;
204 static constexpr unsigned int STORAGE_DIR_CHECK_INIT_INTERVAL_US = 50;
205 static constexpr unsigned int STORAGE_DIR_CHECK_MAX_INTERVAL_US = 1000;
206 /**
207  * Lower bound time we allow storage dir check to sleep.
208  * If it exceeds 2s, PROC_START_TIMEOUT_MSG will kill the starting app anyway,
209  * so it's fine to assume max retries is 5 mins.
210  */
211 static constexpr int STORAGE_DIR_CHECK_TIMEOUT_US = 1000 * 1000 * 60 * 5;
212 
213 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn);
214 
215 /**
216  * A helper class containing accounting information for USAPs.
217  */
218 class UsapTableEntry {
219  public:
220   struct EntryStorage {
221     int32_t pid;
222     int32_t read_pipe_fd;
223 
operator !=__anon6123840f0111::UsapTableEntry::EntryStorage224     bool operator!=(const EntryStorage& other) {
225       return pid != other.pid || read_pipe_fd != other.read_pipe_fd;
226     }
227   };
228 
229  private:
230   static constexpr EntryStorage INVALID_ENTRY_VALUE = {-1, -1};
231 
232   std::atomic<EntryStorage> mStorage;
233   static_assert(decltype(mStorage)::is_always_lock_free);  // Accessed from signal handler.
234 
235  public:
UsapTableEntry()236   constexpr UsapTableEntry() : mStorage(INVALID_ENTRY_VALUE) {}
237 
238   /**
239    * If the provided PID matches the one stored in this entry, the entry will
240    * be invalidated and the associated file descriptor will be closed.  If the
241    * PIDs don't match nothing will happen.
242    *
243    * @param pid The ID of the process who's entry we want to clear.
244    * @return True if the entry was cleared by this call; false otherwise
245    */
ClearForPID(int32_t pid)246   bool ClearForPID(int32_t pid) {
247     EntryStorage storage = mStorage.load();
248 
249     if (storage.pid == pid) {
250       /*
251        * There are three possible outcomes from this compare-and-exchange:
252        *   1) It succeeds, in which case we close the FD
253        *   2) It fails and the new value is INVALID_ENTRY_VALUE, in which case
254        *      the entry has already been cleared.
255        *   3) It fails and the new value isn't INVALID_ENTRY_VALUE, in which
256        *      case the entry has already been cleared and re-used.
257        *
258        * In all three cases the goal of the caller has been met, but only in
259        * the first case do we need to decrement the pool count.
260        */
261       if (mStorage.compare_exchange_strong(storage, INVALID_ENTRY_VALUE)) {
262         close(storage.read_pipe_fd);
263         return true;
264       } else {
265         return false;
266       }
267 
268     } else {
269       return false;
270     }
271   }
272 
Clear()273   void Clear() {
274     EntryStorage storage = mStorage.load();
275 
276     if (storage != INVALID_ENTRY_VALUE) {
277       close(storage.read_pipe_fd);
278       mStorage.store(INVALID_ENTRY_VALUE);
279     }
280   }
281 
Invalidate()282   void Invalidate() {
283     mStorage.store(INVALID_ENTRY_VALUE);
284   }
285 
286   /**
287    * @return A copy of the data stored in this entry.
288    */
GetValues()289   std::optional<EntryStorage> GetValues() {
290     EntryStorage storage = mStorage.load();
291 
292     if (storage != INVALID_ENTRY_VALUE) {
293       return storage;
294     } else {
295       return std::nullopt;
296     }
297   }
298 
299   /**
300    * Sets the entry to the given values if it is currently invalid.
301    *
302    * @param pid  The process ID for the new entry.
303    * @param read_pipe_fd  The read end of the USAP control pipe for this
304    * process.
305    * @return True if the entry was set; false otherwise.
306    */
SetIfInvalid(int32_t pid,int32_t read_pipe_fd)307   bool SetIfInvalid(int32_t pid, int32_t read_pipe_fd) {
308     EntryStorage new_value_storage;
309 
310     new_value_storage.pid = pid;
311     new_value_storage.read_pipe_fd = read_pipe_fd;
312 
313     EntryStorage expected = INVALID_ENTRY_VALUE;
314 
315     return mStorage.compare_exchange_strong(expected, new_value_storage);
316   }
317 };
318 
319 /**
320  * A table containing information about the USAPs currently in the pool.
321  *
322  * Multiple threads may be attempting to modify the table, either from the
323  * signal handler or from the ZygoteServer poll loop.  Atomic loads/stores in
324  * the USAPTableEntry class prevent data races during these concurrent
325  * operations.
326  */
327 static std::array<UsapTableEntry, USAP_POOL_SIZE_MAX_LIMIT> gUsapTable;
328 
329 /**
330  * The list of open zygote file descriptors.
331  */
332 static FileDescriptorTable* gOpenFdTable = nullptr;
333 
334 // Must match values in com.android.internal.os.Zygote.
335 // The values should be consistent with IVold.aidl
336 enum MountExternalKind {
337     MOUNT_EXTERNAL_NONE = 0,
338     MOUNT_EXTERNAL_DEFAULT = 1,
339     MOUNT_EXTERNAL_INSTALLER = 2,
340     MOUNT_EXTERNAL_PASS_THROUGH = 3,
341     MOUNT_EXTERNAL_ANDROID_WRITABLE = 4,
342     MOUNT_EXTERNAL_COUNT = 5
343 };
344 
345 // Must match values in com.android.internal.os.Zygote.
346 enum RuntimeFlags : uint32_t {
347     DEBUG_ENABLE_JDWP = 1,
348     PROFILE_SYSTEM_SERVER = 1 << 14,
349     PROFILE_FROM_SHELL = 1 << 15,
350     MEMORY_TAG_LEVEL_MASK = (1 << 19) | (1 << 20),
351     MEMORY_TAG_LEVEL_TBI = 1 << 19,
352     MEMORY_TAG_LEVEL_ASYNC = 2 << 19,
353     MEMORY_TAG_LEVEL_SYNC = 3 << 19,
354     GWP_ASAN_LEVEL_MASK = (1 << 21) | (1 << 22),
355     GWP_ASAN_LEVEL_NEVER = 0 << 21,
356     GWP_ASAN_LEVEL_LOTTERY = 1 << 21,
357     GWP_ASAN_LEVEL_ALWAYS = 2 << 21,
358     GWP_ASAN_LEVEL_DEFAULT = 3 << 21,
359     NATIVE_HEAP_ZERO_INIT_ENABLED = 1 << 23,
360     PROFILEABLE = 1 << 24,
361     DEBUG_ENABLE_PTRACE = 1 << 25,
362 };
363 
364 enum UnsolicitedZygoteMessageTypes : uint32_t {
365     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED = 0,
366     UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD = 1,
367 };
368 
369 struct UnsolicitedZygoteMessageSigChld {
370     struct {
371         UnsolicitedZygoteMessageTypes type;
372     } header;
373     struct {
374         pid_t pid;
375         uid_t uid;
376         int status;
377     } payload;
378 };
379 
380 // Keep sync with services/core/java/com/android/server/am/ProcessList.java
381 static constexpr struct sockaddr_un kSystemServerSockAddr =
382         {.sun_family = AF_LOCAL, .sun_path = "/data/system/unsolzygotesocket"};
383 
384 // Forward declaration so we don't have to move the signal handler.
385 static bool RemoveUsapTableEntry(pid_t usap_pid);
386 
RuntimeAbort(JNIEnv * env,int line,const char * msg)387 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
388   std::ostringstream oss;
389   oss << __FILE__ << ":" << line << ": " << msg;
390   env->FatalError(oss.str().c_str());
391 }
392 
393 // Create the socket which is going to be used to send unsolicited message
394 // to system_server, the socket will be closed post forking a child process.
395 // It's expected to be called at each zygote's initialization.
initUnsolSocketToSystemServer()396 static void initUnsolSocketToSystemServer() {
397     gSystemServerSocketFd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_NONBLOCK, 0);
398     if (gSystemServerSocketFd >= 0) {
399         ALOGV("Zygote:systemServerSocketFD = %d", gSystemServerSocketFd);
400     } else {
401         ALOGE("Unable to create socket file descriptor to connect to system_server");
402     }
403 }
404 
sendSigChildStatus(const pid_t pid,const uid_t uid,const int status)405 static void sendSigChildStatus(const pid_t pid, const uid_t uid, const int status) {
406     int socketFd = gSystemServerSocketFd;
407     if (socketFd >= 0) {
408         // fill the message buffer
409         struct UnsolicitedZygoteMessageSigChld data =
410                 {.header = {.type = UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD},
411                  .payload = {.pid = pid, .uid = uid, .status = status}};
412         if (TEMP_FAILURE_RETRY(
413                     sendto(socketFd, &data, sizeof(data), 0,
414                            reinterpret_cast<const struct sockaddr*>(&kSystemServerSockAddr),
415                            sizeof(kSystemServerSockAddr))) == -1) {
416             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
417                                   "Zygote failed to write to system_server FD: %s",
418                                   strerror(errno));
419         }
420     }
421 }
422 
423 // This signal handler is for zygote mode, since the zygote must reap its children
424 NO_STACK_PROTECTOR
SigChldHandler(int,siginfo_t * info,void *)425 static void SigChldHandler(int /*signal_number*/, siginfo_t* info, void* /*ucontext*/) {
426     pid_t pid;
427     int status;
428     int64_t usaps_removed = 0;
429 
430     // It's necessary to save and restore the errno during this function.
431     // Since errno is stored per thread, changing it here modifies the errno
432     // on the thread on which this signal handler executes. If a signal occurs
433     // between a call and an errno check, it's possible to get the errno set
434     // here.
435     // See b/23572286 for extra information.
436     int saved_errno = errno;
437 
438     while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
439         // Notify system_server that we received a SIGCHLD
440         sendSigChildStatus(pid, info->si_uid, status);
441         // Log process-death status that we care about.
442         if (WIFEXITED(status)) {
443             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG, "Process %d exited cleanly (%d)", pid,
444                                   WEXITSTATUS(status));
445 
446             // Check to see if the PID is in the USAP pool and remove it if it is.
447             if (RemoveUsapTableEntry(pid)) {
448                 ++usaps_removed;
449             }
450         } else if (WIFSIGNALED(status)) {
451             async_safe_format_log(ANDROID_LOG_INFO, LOG_TAG,
452                                   "Process %d exited due to signal %d (%s)%s", pid,
453                                   WTERMSIG(status), strsignal(WTERMSIG(status)),
454                                   WCOREDUMP(status) ? "; core dumped" : "");
455 
456             // If the process exited due to a signal other than SIGTERM, check to see
457             // if the PID is in the USAP pool and remove it if it is.  If the process
458             // was closed by the Zygote using SIGTERM then the USAP pool entry will
459             // have already been removed (see nativeEmptyUsapPool()).
460             if (WTERMSIG(status) != SIGTERM && RemoveUsapTableEntry(pid)) {
461                 ++usaps_removed;
462             }
463         }
464 
465         // If the just-crashed process is the system_server, bring down zygote
466         // so that it is restarted by init and system server will be restarted
467         // from there.
468         if (pid == gSystemServerPid) {
469             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
470                                   "Exit zygote because system server (pid %d) has terminated", pid);
471             kill(getpid(), SIGKILL);
472         }
473     }
474 
475     // Note that we shouldn't consider ECHILD an error because
476     // the secondary zygote might have no children left to wait for.
477     if (pid < 0 && errno != ECHILD) {
478         async_safe_format_log(ANDROID_LOG_WARN, LOG_TAG, "Zygote SIGCHLD error in waitpid: %s",
479                               strerror(errno));
480     }
481 
482     if (usaps_removed > 0) {
483         if (TEMP_FAILURE_RETRY(write(gUsapPoolEventFD, &usaps_removed, sizeof(usaps_removed))) ==
484             -1) {
485             // If this write fails something went terribly wrong.  We will now kill
486             // the zygote and let the system bring it back up.
487             async_safe_format_log(ANDROID_LOG_ERROR, LOG_TAG,
488                                   "Zygote failed to write to USAP pool event FD: %s",
489                                   strerror(errno));
490             kill(getpid(), SIGKILL);
491         }
492     }
493 
494     errno = saved_errno;
495 }
496 
497 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
498 // configured very late, because earlier in the runtime we may fork() and
499 // exec() other processes, and we want to waitpid() for those rather than
500 // have them be harvested immediately.
501 //
502 // Ignore SIGHUP because all processes forked by the zygote are in the same
503 // process group as the zygote and we don't want to be notified if we become
504 // an orphaned group and have one or more stopped processes. This is not a
505 // theoretical concern :
506 // - we can become an orphaned group if one of our direct descendants forks
507 //   and is subsequently killed before its children.
508 // - crash_dump routinely STOPs the process it's tracing.
509 //
510 // See issues b/71965619 and b/25567761 for further details.
511 //
512 // This ends up being called repeatedly before each fork(), but there's
513 // no real harm in that.
SetSignalHandlers()514 static void SetSignalHandlers() {
515     struct sigaction sig_chld = {.sa_flags = SA_SIGINFO, .sa_sigaction = SigChldHandler};
516 
517     if (sigaction(SIGCHLD, &sig_chld, nullptr) < 0) {
518         ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
519     }
520 
521   struct sigaction sig_hup = {};
522   sig_hup.sa_handler = SIG_IGN;
523   if (sigaction(SIGHUP, &sig_hup, nullptr) < 0) {
524     ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
525   }
526 }
527 
528 // Sets the SIGCHLD handler back to default behavior in zygote children.
UnsetChldSignalHandler()529 static void UnsetChldSignalHandler() {
530   struct sigaction sa;
531   memset(&sa, 0, sizeof(sa));
532   sa.sa_handler = SIG_DFL;
533 
534   if (sigaction(SIGCHLD, &sa, nullptr) < 0) {
535     ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
536   }
537 }
538 
539 // Calls POSIX setgroups() using the int[] object as an argument.
540 // A nullptr argument is tolerated.
SetGids(JNIEnv * env,jintArray managed_gids,jboolean is_child_zygote,fail_fn_t fail_fn)541 static void SetGids(JNIEnv* env, jintArray managed_gids, jboolean is_child_zygote,
542                     fail_fn_t fail_fn) {
543   if (managed_gids == nullptr) {
544     if (is_child_zygote) {
545       // For child zygotes like webview and app zygote, we want to clear out
546       // any supplemental groups the parent zygote had.
547       if (setgroups(0, NULL) == -1) {
548         fail_fn(CREATE_ERROR("Failed to remove supplementary groups for child zygote"));
549       }
550     }
551     return;
552   }
553 
554   ScopedIntArrayRO gids(env, managed_gids);
555   if (gids.get() == nullptr) {
556     fail_fn(CREATE_ERROR("Getting gids int array failed"));
557   }
558 
559   if (setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])) == -1) {
560     fail_fn(CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size()));
561   }
562 }
563 
ensureInAppMountNamespace(fail_fn_t fail_fn)564 static void ensureInAppMountNamespace(fail_fn_t fail_fn) {
565   if (gInAppMountNamespace) {
566     // In app mount namespace already
567     return;
568   }
569   if (unshare(CLONE_NEWNS) == -1) {
570     fail_fn(CREATE_ERROR("Failed to unshare(): %s", strerror(errno)));
571   }
572   gInAppMountNamespace = true;
573 }
574 
575 // Sets the resource limits via setrlimit(2) for the values in the
576 // two-dimensional array of integers that's passed in. The second dimension
577 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). nullptr is
578 // treated as an empty array.
SetRLimits(JNIEnv * env,jobjectArray managed_rlimits,fail_fn_t fail_fn)579 static void SetRLimits(JNIEnv* env, jobjectArray managed_rlimits, fail_fn_t fail_fn) {
580   if (managed_rlimits == nullptr) {
581     return;
582   }
583 
584   rlimit rlim;
585   memset(&rlim, 0, sizeof(rlim));
586 
587   for (int i = 0; i < env->GetArrayLength(managed_rlimits); ++i) {
588     ScopedLocalRef<jobject>
589         managed_rlimit_object(env, env->GetObjectArrayElement(managed_rlimits, i));
590     ScopedIntArrayRO rlimit_handle(env, reinterpret_cast<jintArray>(managed_rlimit_object.get()));
591 
592     if (rlimit_handle.size() != 3) {
593       fail_fn(CREATE_ERROR("rlimits array must have a second dimension of size 3"));
594     }
595 
596     rlim.rlim_cur = rlimit_handle[1];
597     rlim.rlim_max = rlimit_handle[2];
598 
599     if (setrlimit(rlimit_handle[0], &rlim) == -1) {
600       fail_fn(CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed",
601                            rlimit_handle[0], rlim.rlim_cur, rlim.rlim_max));
602     }
603   }
604 }
605 
EnableDebugger()606 static void EnableDebugger() {
607   // To let a non-privileged gdbserver attach to this
608   // process, we must set our dumpable flag.
609   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
610     ALOGE("prctl(PR_SET_DUMPABLE) failed");
611   }
612 
613   // A non-privileged native debugger should be able to attach to the debuggable app, even if Yama
614   // is enabled (see kernel/Documentation/security/Yama.txt).
615   if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
616     // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
617     // case since it's expected behaviour.
618     if (errno != EINVAL) {
619       ALOGE("prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed");
620     }
621   }
622 
623   // Set the core dump size to zero unless wanted (see also coredump_setup in build/envsetup.sh).
624   if (!GetBoolProperty("persist.zygote.core_dump", false)) {
625     // Set the soft limit on core dump size to 0 without changing the hard limit.
626     rlimit rl;
627     if (getrlimit(RLIMIT_CORE, &rl) == -1) {
628       ALOGE("getrlimit(RLIMIT_CORE) failed");
629     } else {
630       rl.rlim_cur = 0;
631       if (setrlimit(RLIMIT_CORE, &rl) == -1) {
632         ALOGE("setrlimit(RLIMIT_CORE) failed");
633       }
634     }
635   }
636 }
637 
PreApplicationInit()638 static void PreApplicationInit() {
639   // The child process sets this to indicate it's not the zygote.
640   android_mallopt(M_SET_ZYGOTE_CHILD, nullptr, 0);
641 
642   // Set the jemalloc decay time to 1.
643   mallopt(M_DECAY_TIME, 1);
644 }
645 
SetUpSeccompFilter(uid_t uid,bool is_child_zygote)646 static void SetUpSeccompFilter(uid_t uid, bool is_child_zygote) {
647   if (!gIsSecurityEnforced) {
648     ALOGI("seccomp disabled by setenforce 0");
649     return;
650   }
651 
652   // Apply system or app filter based on uid.
653   if (uid >= AID_APP_START) {
654     if (is_child_zygote) {
655       set_app_zygote_seccomp_filter();
656     } else {
657       set_app_seccomp_filter();
658     }
659   } else {
660     set_system_seccomp_filter();
661   }
662 }
663 
EnableKeepCapabilities(fail_fn_t fail_fn)664 static void EnableKeepCapabilities(fail_fn_t fail_fn) {
665   if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) {
666     fail_fn(CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno)));
667   }
668 }
669 
DropCapabilitiesBoundingSet(fail_fn_t fail_fn,jlong bounding_capabilities)670 static void DropCapabilitiesBoundingSet(fail_fn_t fail_fn, jlong bounding_capabilities) {
671   for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {;
672     if ((1LL << i) & bounding_capabilities) continue;
673     if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) == -1) {
674       if (errno == EINVAL) {
675         ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
676               "your kernel is compiled with file capabilities support");
677       } else {
678         fail_fn(CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno)));
679       }
680     }
681   }
682 }
683 
MatchGid(JNIEnv * env,jintArray gids,jint gid,jint gid_to_find)684 static bool MatchGid(JNIEnv* env, jintArray gids, jint gid, jint gid_to_find) {
685   if (gid == gid_to_find) return true;
686 
687   if (gids == nullptr) return false;
688 
689   jsize gids_num = env->GetArrayLength(gids);
690   ScopedIntArrayRO native_gid_proxy(env, gids);
691 
692   if (native_gid_proxy.get() == nullptr) {
693     RuntimeAbort(env, __LINE__, "Bad gids array");
694   }
695 
696   for (int gids_index = 0; gids_index < gids_num; ++gids_index) {
697     if (native_gid_proxy[gids_index] == gid_to_find) {
698       return true;
699     }
700   }
701 
702   return false;
703 }
704 
SetInheritable(uint64_t inheritable,fail_fn_t fail_fn)705 static void SetInheritable(uint64_t inheritable, fail_fn_t fail_fn) {
706   __user_cap_header_struct capheader;
707   memset(&capheader, 0, sizeof(capheader));
708   capheader.version = _LINUX_CAPABILITY_VERSION_3;
709   capheader.pid = 0;
710 
711   __user_cap_data_struct capdata[2];
712   if (capget(&capheader, &capdata[0]) == -1) {
713     fail_fn(CREATE_ERROR("capget failed: %s", strerror(errno)));
714   }
715 
716   capdata[0].inheritable = inheritable;
717   capdata[1].inheritable = inheritable >> 32;
718 
719   if (capset(&capheader, &capdata[0]) == -1) {
720     fail_fn(CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno)));
721   }
722 }
723 
SetCapabilities(uint64_t permitted,uint64_t effective,uint64_t inheritable,fail_fn_t fail_fn)724 static void SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
725                             fail_fn_t fail_fn) {
726   __user_cap_header_struct capheader;
727   memset(&capheader, 0, sizeof(capheader));
728   capheader.version = _LINUX_CAPABILITY_VERSION_3;
729   capheader.pid = 0;
730 
731   __user_cap_data_struct capdata[2];
732   memset(&capdata, 0, sizeof(capdata));
733   capdata[0].effective = effective;
734   capdata[1].effective = effective >> 32;
735   capdata[0].permitted = permitted;
736   capdata[1].permitted = permitted >> 32;
737   capdata[0].inheritable = inheritable;
738   capdata[1].inheritable = inheritable >> 32;
739 
740   if (capset(&capheader, &capdata[0]) == -1) {
741     fail_fn(CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
742                          "failed: %s", permitted, effective, inheritable, strerror(errno)));
743   }
744 }
745 
SetSchedulerPolicy(fail_fn_t fail_fn,bool is_top_app)746 static void SetSchedulerPolicy(fail_fn_t fail_fn, bool is_top_app) {
747   SchedPolicy policy = is_top_app ? SP_TOP_APP : SP_DEFAULT;
748 
749   if (is_top_app && cpusets_enabled()) {
750     errno = -set_cpuset_policy(0, policy);
751     if (errno != 0) {
752       fail_fn(CREATE_ERROR("set_cpuset_policy(0, %d) failed: %s", policy, strerror(errno)));
753     }
754   }
755 
756   errno = -set_sched_policy(0, policy);
757   if (errno != 0) {
758     fail_fn(CREATE_ERROR("set_sched_policy(0, %d) failed: %s", policy, strerror(errno)));
759   }
760 
761   // We are going to lose the permission to set scheduler policy during the specialization, so make
762   // sure that we don't cache the fd of cgroup path that may cause sepolicy violation by writing
763   // value to the cached fd directly when creating new thread.
764   DropTaskProfilesResourceCaching();
765 }
766 
UnmountTree(const char * path)767 static int UnmountTree(const char* path) {
768   ATRACE_CALL();
769 
770   size_t path_len = strlen(path);
771 
772   FILE* fp = setmntent("/proc/mounts", "r");
773   if (fp == nullptr) {
774     ALOGE("Error opening /proc/mounts: %s", strerror(errno));
775     return -errno;
776   }
777 
778   // Some volumes can be stacked on each other, so force unmount in
779   // reverse order to give us the best chance of success.
780   std::list<std::string> to_unmount;
781   mntent* mentry;
782   while ((mentry = getmntent(fp)) != nullptr) {
783     if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
784       to_unmount.push_front(std::string(mentry->mnt_dir));
785     }
786   }
787   endmntent(fp);
788 
789   for (const auto& path : to_unmount) {
790     if (umount2(path.c_str(), MNT_DETACH)) {
791       ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
792     }
793   }
794   return 0;
795 }
796 
PrepareDir(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)797 static void PrepareDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
798                       fail_fn_t fail_fn) {
799   if (fs_prepare_dir(dir.c_str(), mode, uid, gid) != 0) {
800     fail_fn(CREATE_ERROR("fs_prepare_dir failed on %s: %s",
801                          dir.c_str(), strerror(errno)));
802   }
803 }
804 
PrepareDirIfNotPresent(const std::string & dir,mode_t mode,uid_t uid,gid_t gid,fail_fn_t fail_fn)805 static void PrepareDirIfNotPresent(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
806                       fail_fn_t fail_fn) {
807   struct stat sb;
808   if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &sb)) != -1) {
809     // Directory exists already
810     return;
811   }
812   PrepareDir(dir, mode, uid, gid, fail_fn);
813 }
814 
BindMount(const std::string & source_dir,const std::string & target_dir)815 static bool BindMount(const std::string& source_dir, const std::string& target_dir) {
816   return !(TEMP_FAILURE_RETRY(mount(source_dir.c_str(), target_dir.c_str(), nullptr,
817                                     MS_BIND | MS_REC, nullptr)) == -1);
818 }
819 
BindMount(const std::string & source_dir,const std::string & target_dir,fail_fn_t fail_fn)820 static void BindMount(const std::string& source_dir, const std::string& target_dir,
821                       fail_fn_t fail_fn) {
822   if (!BindMount(source_dir, target_dir)) {
823     fail_fn(CREATE_ERROR("Failed to mount %s to %s: %s",
824                          source_dir.c_str(), target_dir.c_str(), strerror(errno)));
825   }
826 }
827 
MountAppDataTmpFs(const std::string & target_dir,fail_fn_t fail_fn)828 static void MountAppDataTmpFs(const std::string& target_dir,
829                       fail_fn_t fail_fn) {
830   if (TEMP_FAILURE_RETRY(mount("tmpfs", target_dir.c_str(), "tmpfs",
831                                MS_NOSUID | MS_NODEV | MS_NOEXEC, "uid=0,gid=0,mode=0751")) == -1) {
832     fail_fn(CREATE_ERROR("Failed to mount tmpfs to %s: %s",
833                          target_dir.c_str(), strerror(errno)));
834   }
835 }
836 
837 // Create a private mount namespace and bind mount appropriate emulated
838 // storage for the given user.
MountEmulatedStorage(uid_t uid,jint mount_mode,bool force_mount_namespace,fail_fn_t fail_fn)839 static void MountEmulatedStorage(uid_t uid, jint mount_mode,
840         bool force_mount_namespace,
841         fail_fn_t fail_fn) {
842   // See storage config details at http://source.android.com/tech/storage/
843   ATRACE_CALL();
844 
845   if (mount_mode < 0 || mount_mode >= MOUNT_EXTERNAL_COUNT) {
846     fail_fn(CREATE_ERROR("Unknown mount_mode: %d", mount_mode));
847   }
848 
849   if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
850     // Valid default of no storage visible
851     return;
852   }
853 
854   // Create a second private mount namespace for our process
855   ensureInAppMountNamespace(fail_fn);
856 
857   // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
858   if (mount_mode == MOUNT_EXTERNAL_NONE) {
859     return;
860   }
861 
862   const userid_t user_id = multiuser_get_user_id(uid);
863   const std::string user_source = StringPrintf("/mnt/user/%d", user_id);
864   // Shell is neither AID_ROOT nor AID_EVERYBODY. Since it equally needs 'execute' access to
865   // /mnt/user/0 to 'adb shell ls /sdcard' for instance, we set the uid bit of /mnt/user/0 to
866   // AID_SHELL. This gives shell access along with apps running as group everybody (user 0 apps)
867   // These bits should be consistent with what is set in vold in
868   // Utils#MountUserFuse on FUSE volume mount
869   PrepareDir(user_source, 0710, user_id ? AID_ROOT : AID_SHELL,
870              multiuser_get_uid(user_id, AID_EVERYBODY), fail_fn);
871 
872   bool isAppDataIsolationEnabled = GetBoolProperty(kVoldAppDataIsolation, false);
873 
874   if (mount_mode == MOUNT_EXTERNAL_PASS_THROUGH) {
875       const std::string pass_through_source = StringPrintf("/mnt/pass_through/%d", user_id);
876       PrepareDir(pass_through_source, 0710, AID_ROOT, AID_MEDIA_RW, fail_fn);
877       BindMount(pass_through_source, "/storage", fail_fn);
878   } else if (mount_mode == MOUNT_EXTERNAL_INSTALLER) {
879       const std::string installer_source = StringPrintf("/mnt/installer/%d", user_id);
880       BindMount(installer_source, "/storage", fail_fn);
881   } else if (isAppDataIsolationEnabled && mount_mode == MOUNT_EXTERNAL_ANDROID_WRITABLE) {
882       const std::string writable_source = StringPrintf("/mnt/androidwritable/%d", user_id);
883       BindMount(writable_source, "/storage", fail_fn);
884   } else {
885       BindMount(user_source, "/storage", fail_fn);
886   }
887 }
888 
889 // Utility to close down the Zygote socket file descriptors while
890 // the child is still running as root with Zygote's privileges.  Each
891 // descriptor (if any) is closed via dup3(), replacing it with a valid
892 // (open) descriptor to /dev/null.
893 
DetachDescriptors(JNIEnv * env,const std::vector<int> & fds_to_close,fail_fn_t fail_fn)894 static void DetachDescriptors(JNIEnv* env,
895                               const std::vector<int>& fds_to_close,
896                               fail_fn_t fail_fn) {
897 
898   if (fds_to_close.size() > 0) {
899     android::base::unique_fd devnull_fd(open("/dev/null", O_RDWR | O_CLOEXEC));
900     if (devnull_fd == -1) {
901       fail_fn(std::string("Failed to open /dev/null: ").append(strerror(errno)));
902     }
903 
904     for (int fd : fds_to_close) {
905       ALOGV("Switching descriptor %d to /dev/null", fd);
906       if (TEMP_FAILURE_RETRY(dup3(devnull_fd, fd, O_CLOEXEC)) == -1) {
907         fail_fn(StringPrintf("Failed dup3() on descriptor %d: %s", fd, strerror(errno)));
908       }
909     }
910   }
911 }
912 
SetThreadName(const std::string & thread_name)913 void SetThreadName(const std::string& thread_name) {
914   bool hasAt = false;
915   bool hasDot = false;
916 
917   for (const char str_el : thread_name) {
918     if (str_el == '.') {
919       hasDot = true;
920     } else if (str_el == '@') {
921       hasAt = true;
922     }
923   }
924 
925   const char* name_start_ptr = thread_name.c_str();
926   if (thread_name.length() >= MAX_NAME_LENGTH && !hasAt && hasDot) {
927     name_start_ptr += thread_name.length() - MAX_NAME_LENGTH;
928   }
929 
930   // pthread_setname_np fails rather than truncating long strings.
931   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
932   strlcpy(buf, name_start_ptr, sizeof(buf));
933   errno = pthread_setname_np(pthread_self(), buf);
934   if (errno != 0) {
935     ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
936   }
937   // Update base::logging default tag.
938   android::base::SetDefaultTag(buf);
939 }
940 
941 /**
942  * A helper method for converting managed strings to native strings.  A fatal
943  * error is generated if a problem is encountered in extracting a non-null
944  * string.
945  *
946  * @param env  Managed runtime environment
947  * @param process_name  A native representation of the process name
948  * @param managed_process_name  A managed representation of the process name
949  * @param managed_string  The managed string to extract
950  *
951  * @return An empty option if the managed string is null.  A optional-wrapped
952  * string otherwise.
953  */
ExtractJString(JNIEnv * env,const char * process_name,jstring managed_process_name,jstring managed_string)954 static std::optional<std::string> ExtractJString(JNIEnv* env,
955                                                  const char* process_name,
956                                                  jstring managed_process_name,
957                                                  jstring managed_string) {
958   if (managed_string == nullptr) {
959     return std::nullopt;
960   } else {
961     ScopedUtfChars scoped_string_chars(env, managed_string);
962 
963     if (scoped_string_chars.c_str() != nullptr) {
964       return std::optional<std::string>(scoped_string_chars.c_str());
965     } else {
966       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JString.");
967     }
968   }
969 }
970 
971 /**
972  * A helper method for converting managed string arrays to native vectors.  A
973  * fatal error is generated if a problem is encountered in extracting a non-null array.
974  *
975  * @param env  Managed runtime environment
976  * @param process_name  A native representation of the process name
977  * @param managed_process_name  A managed representation of the process name
978  * @param managed_array  The managed integer array to extract
979  *
980  * @return An empty option if the managed array is null.  A optional-wrapped
981  * vector otherwise.
982  */
ExtractJIntArray(JNIEnv * env,const char * process_name,jstring managed_process_name,jintArray managed_array)983 static std::optional<std::vector<int>> ExtractJIntArray(JNIEnv* env,
984                                                         const char* process_name,
985                                                         jstring managed_process_name,
986                                                         jintArray managed_array) {
987   if (managed_array == nullptr) {
988     return std::nullopt;
989   } else {
990     ScopedIntArrayRO managed_array_handle(env, managed_array);
991 
992     if (managed_array_handle.get() != nullptr) {
993       std::vector<int> native_array;
994       native_array.reserve(managed_array_handle.size());
995 
996       for (size_t array_index = 0; array_index < managed_array_handle.size(); ++array_index) {
997         native_array.push_back(managed_array_handle[array_index]);
998       }
999 
1000       return std::move(native_array);
1001 
1002     } else {
1003       ZygoteFailure(env, process_name, managed_process_name, "Failed to extract JIntArray.");
1004     }
1005   }
1006 }
1007 
1008 /**
1009  * A utility function for blocking signals.
1010  *
1011  * @param signum  Signal number to block
1012  * @param fail_fn  Fatal error reporting function
1013  *
1014  * @see ZygoteFailure
1015  */
BlockSignal(int signum,fail_fn_t fail_fn)1016 static void BlockSignal(int signum, fail_fn_t fail_fn) {
1017   sigset_t sigs;
1018   sigemptyset(&sigs);
1019   sigaddset(&sigs, signum);
1020 
1021   if (sigprocmask(SIG_BLOCK, &sigs, nullptr) == -1) {
1022     fail_fn(CREATE_ERROR("Failed to block signal %s: %s", strsignal(signum), strerror(errno)));
1023   }
1024 }
1025 
1026 
1027 /**
1028  * A utility function for unblocking signals.
1029  *
1030  * @param signum  Signal number to unblock
1031  * @param fail_fn  Fatal error reporting function
1032  *
1033  * @see ZygoteFailure
1034  */
UnblockSignal(int signum,fail_fn_t fail_fn)1035 static void UnblockSignal(int signum, fail_fn_t fail_fn) {
1036   sigset_t sigs;
1037   sigemptyset(&sigs);
1038   sigaddset(&sigs, signum);
1039 
1040   if (sigprocmask(SIG_UNBLOCK, &sigs, nullptr) == -1) {
1041     fail_fn(CREATE_ERROR("Failed to un-block signal %s: %s", strsignal(signum), strerror(errno)));
1042   }
1043 }
1044 
ClearUsapTable()1045 static void ClearUsapTable() {
1046   for (UsapTableEntry& entry : gUsapTable) {
1047     entry.Clear();
1048   }
1049 
1050   gUsapPoolCount = 0;
1051 }
1052 
1053 // Create an app data directory over tmpfs overlayed CE / DE storage, and bind mount it
1054 // from the actual app data directory in data mirror.
createAndMountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn,bool call_fail_fn)1055 static bool createAndMountAppData(std::string_view package_name,
1056     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1057     std::string_view actual_data_path, fail_fn_t fail_fn, bool call_fail_fn) {
1058 
1059   char mirrorAppDataPath[PATH_MAX];
1060   char actualAppDataPath[PATH_MAX];
1061   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1062       mirror_pkg_dir_name.data());
1063   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1064 
1065   PrepareDir(actualAppDataPath, 0700, AID_ROOT, AID_ROOT, fail_fn);
1066 
1067   // Bind mount from original app data directory in mirror.
1068   if (call_fail_fn) {
1069     BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1070   } else if(!BindMount(mirrorAppDataPath, actualAppDataPath)) {
1071     ALOGW("Failed to mount %s to %s: %s",
1072           mirrorAppDataPath, actualAppDataPath, strerror(errno));
1073     return false;
1074   }
1075   return true;
1076 }
1077 
1078 // There is an app data directory over tmpfs overlaid CE / DE storage
1079 // bind mount it from the actual app data directory in data mirror.
mountAppData(std::string_view package_name,std::string_view mirror_pkg_dir_name,std::string_view mirror_data_path,std::string_view actual_data_path,fail_fn_t fail_fn)1080 static void mountAppData(std::string_view package_name,
1081     std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
1082     std::string_view actual_data_path, fail_fn_t fail_fn) {
1083 
1084   char mirrorAppDataPath[PATH_MAX];
1085   char actualAppDataPath[PATH_MAX];
1086   snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
1087       mirror_pkg_dir_name.data());
1088   snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
1089 
1090   // Bind mount from original app data directory in mirror.
1091   BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
1092 }
1093 
1094 // Get the directory name stored in /data/data. If device is unlocked it should be the same as
1095 // package name, otherwise it will be an encrypted name but with same inode number.
getAppDataDirName(std::string_view parent_path,std::string_view package_name,long long ce_data_inode,fail_fn_t fail_fn)1096 static std::string getAppDataDirName(std::string_view parent_path, std::string_view package_name,
1097       long long ce_data_inode, fail_fn_t fail_fn) {
1098   // Check if directory exists
1099   char tmpPath[PATH_MAX];
1100   snprintf(tmpPath, PATH_MAX, "%s/%s", parent_path.data(), package_name.data());
1101   struct stat s;
1102   int err = stat(tmpPath, &s);
1103   if (err == 0) {
1104     // Directory exists, so return the directory name
1105     return package_name.data();
1106   } else {
1107     if (errno != ENOENT) {
1108       fail_fn(CREATE_ERROR("Unexpected error in getAppDataDirName: %s", strerror(errno)));
1109       return nullptr;
1110     }
1111     {
1112       // Directory doesn't exist, try to search the name from inode
1113       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1114       if (dir == nullptr) {
1115         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1116       }
1117       struct dirent* ent;
1118       while ((ent = readdir(dir.get()))) {
1119         if (static_cast<long long>(ent->d_ino) == ce_data_inode) {
1120             return ent->d_name;
1121         }
1122       }
1123     }
1124 
1125     // Fallback due to b/145989852, ce_data_inode stored in package manager may be corrupted
1126     // if ino_t is 32 bits.
1127     ino_t fixed_ce_data_inode = 0;
1128     if ((ce_data_inode & UPPER_HALF_WORD_MASK) == UPPER_HALF_WORD_MASK) {
1129       fixed_ce_data_inode = ce_data_inode & LOWER_HALF_WORD_MASK;
1130     } else if ((ce_data_inode & LOWER_HALF_WORD_MASK) == LOWER_HALF_WORD_MASK) {
1131       fixed_ce_data_inode = ((ce_data_inode >> 32) & LOWER_HALF_WORD_MASK);
1132     }
1133     if (fixed_ce_data_inode != 0) {
1134       std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(parent_path.data()), closedir);
1135       if (dir == nullptr) {
1136         fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
1137       }
1138       struct dirent* ent;
1139       while ((ent = readdir(dir.get()))) {
1140         if (ent->d_ino == fixed_ce_data_inode) {
1141           long long d_ino = ent->d_ino;
1142           ALOGW("Fallback success inode %lld -> %lld", ce_data_inode, d_ino);
1143           return ent->d_name;
1144         }
1145       }
1146     }
1147     // Fallback done
1148     ALOGW("Unable to find %s:%lld in %s", package_name.data(), ce_data_inode, parent_path.data());
1149     return "";
1150   }
1151 }
1152 
1153 // Isolate app's data directory, by mounting a tmpfs on CE DE storage,
1154 // and create and bind mount app data in related_packages.
isolateAppDataPerPackage(int userId,std::string_view package_name,std::string_view volume_uuid,long long ce_data_inode,std::string_view actualCePath,std::string_view actualDePath,fail_fn_t fail_fn)1155 static void isolateAppDataPerPackage(int userId, std::string_view package_name,
1156     std::string_view volume_uuid, long long ce_data_inode, std::string_view actualCePath,
1157     std::string_view actualDePath, fail_fn_t fail_fn) {
1158 
1159   char mirrorCePath[PATH_MAX];
1160   char mirrorDePath[PATH_MAX];
1161   char mirrorCeParent[PATH_MAX];
1162   snprintf(mirrorCeParent, PATH_MAX, "/data_mirror/data_ce/%s", volume_uuid.data());
1163   snprintf(mirrorCePath, PATH_MAX, "%s/%d", mirrorCeParent, userId);
1164   snprintf(mirrorDePath, PATH_MAX, "/data_mirror/data_de/%s/%d", volume_uuid.data(), userId);
1165 
1166   createAndMountAppData(package_name, package_name, mirrorDePath, actualDePath, fail_fn,
1167                         true /*call_fail_fn*/);
1168 
1169   std::string ce_data_path = getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1170   if (ce_data_path.empty()) {
1171     ALOGE("Ignoring missing CE app data dir for %s\n", package_name.data());
1172     return;
1173   }
1174   if (!createAndMountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn,
1175                              false /*call_fail_fn*/)) {
1176     // CE might unlocks and the name is decrypted
1177     // get the name and mount again
1178     ce_data_path=getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
1179     if (ce_data_path.empty()) {
1180       ALOGE("Ignoring missing CE app data dir for %s\n", package_name.data());
1181       return;
1182     }
1183     mountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn);
1184   }
1185 }
1186 
1187 // Relabel directory
relabelDir(const char * path,const char * context,fail_fn_t fail_fn)1188 static void relabelDir(const char* path, const char* context, fail_fn_t fail_fn) {
1189   if (setfilecon(path, context) != 0) {
1190     fail_fn(CREATE_ERROR("Failed to setfilecon %s %s", path, strerror(errno)));
1191   }
1192 }
1193 
1194 // Relabel the subdirectories and symlinks in the given directory, non-recursively.
relabelSubdirs(const char * path,const char * context,fail_fn_t fail_fn)1195 static void relabelSubdirs(const char* path, const char* context, fail_fn_t fail_fn) {
1196   DIR* dir = opendir(path);
1197   if (dir == nullptr) {
1198     fail_fn(CREATE_ERROR("Failed to opendir %s", path));
1199   }
1200   struct dirent* ent;
1201   while ((ent = readdir(dir))) {
1202     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1203     auto filePath = StringPrintf("%s/%s", path, ent->d_name);
1204     if (ent->d_type == DT_DIR) {
1205       relabelDir(filePath.c_str(), context, fail_fn);
1206     } else if (ent->d_type == DT_LNK) {
1207       if (lsetfilecon(filePath.c_str(), context) != 0) {
1208         fail_fn(CREATE_ERROR("Failed to lsetfilecon %s %s", filePath.c_str(), strerror(errno)));
1209       }
1210     } else {
1211       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, filePath.c_str()));
1212     }
1213   }
1214   closedir(dir);
1215 }
1216 
1217 /**
1218  * Hide the CE and DE data directories of non-related apps.
1219  *
1220  * Without this, apps can detect if any app is installed by trying to "touch" the app's CE
1221  * or DE data directory, e.g. /data/data/com.whatsapp.  This fails with EACCES if the app
1222  * is installed, or ENOENT if it's not.  Traditional file permissions or SELinux can only
1223  * block accessing those directories but can't fix fingerprinting like this.
1224  *
1225  * Instead, we hide non-related apps' data directories from the filesystem entirely by
1226  * mounting tmpfs instances over their parent directories and bind-mounting in just the
1227  * needed app data directories.  This is done in a private mount namespace.
1228  *
1229  * Steps:
1230  * (1) Collect a list of all related apps (apps with same uid and allowlisted apps) data info
1231  *     (package name, data stored volume uuid, and inode number of its CE data directory)
1232  * (2) Mount tmpfs on /data/data and /data/user{,_de}, and on /mnt/expand/$volume/user{,_de}
1233  *     for all adoptable storage volumes.  This hides all app data directories.
1234  * (3) For each related app, create stubs for its data directories in the relevant tmpfs
1235  *     instances, then bind mount in the actual directories from /data_mirror.  This works
1236  *     for both the CE and DE directories.  DE storage is always unlocked, whereas the
1237  *     app's CE directory can be found via inode number if CE storage is locked.
1238  *
1239  * Example assuming user 0, app "com.android.foo", no shared uid, and no adoptable storage:
1240  * (1) Info = ["com.android.foo", "null" (volume uuid "null"=default), "123456" (inode number)]
1241  * (2) Mount tmpfs on /data/data, /data/user, and /data/user_de.
1242  * (3) For DE storage, create a directory /data/user_de/0/com.android.foo and bind mount
1243  *     /data_mirror/data_de/0/com.android.foo onto it.
1244  * (4) Do similar for CE storage.  But if the device is in direct boot mode, then CE
1245  *     storage will be locked, so the app's CE data directory won't exist at the usual
1246  *     path /data_mirror/data_ce/0/com.android.foo.  It will still exist in
1247  *     /data_mirror/data_ce/0, but its filename will be an unpredictable no-key name.  In
1248  *     this case, we use the inode number to find the right directory instead.  Note that
1249  *     the bind-mounted app CE data directory will remain locked.  It will be unlocked
1250  *     automatically if/when the user's CE storage is unlocked, since adding an encryption
1251  *     key takes effect on a whole filesystem instance including all its mounts.
1252  */
isolateAppData(JNIEnv * env,const std::vector<std::string> & merged_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1253 static void isolateAppData(JNIEnv* env, const std::vector<std::string>& merged_data_info_list,
1254     uid_t uid, const char* process_name,
1255     jstring managed_nice_name, fail_fn_t fail_fn) {
1256 
1257   const userid_t userId = multiuser_get_user_id(uid);
1258 
1259   int size = merged_data_info_list.size();
1260 
1261   // Mount tmpfs on all possible data directories, so app no longer see the original apps data.
1262   char internalCePath[PATH_MAX];
1263   char internalLegacyCePath[PATH_MAX];
1264   char internalDePath[PATH_MAX];
1265   char externalPrivateMountPath[PATH_MAX];
1266 
1267   snprintf(internalCePath, PATH_MAX, "/data/user");
1268   snprintf(internalLegacyCePath, PATH_MAX, "/data/data");
1269   snprintf(internalDePath, PATH_MAX, "/data/user_de");
1270   snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1271 
1272   // Get the "u:object_r:system_userdir_file:s0" security context.  This can be
1273   // gotten from several different places; we use /data/user.
1274   char* dataUserdirContext = nullptr;
1275   if (getfilecon(internalCePath, &dataUserdirContext) < 0) {
1276     fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalCePath,
1277         strerror(errno)));
1278   }
1279   // Get the "u:object_r:system_data_file:s0" security context.  This can be
1280   // gotten from several different places; we use /data/misc.
1281   char* dataFileContext = nullptr;
1282   if (getfilecon("/data/misc", &dataFileContext) < 0) {
1283     fail_fn(CREATE_ERROR("Unable to getfilecon on /data/misc %s", strerror(errno)));
1284   }
1285 
1286   MountAppDataTmpFs(internalLegacyCePath, fail_fn);
1287   MountAppDataTmpFs(internalCePath, fail_fn);
1288   MountAppDataTmpFs(internalDePath, fail_fn);
1289 
1290   // Mount tmpfs on all external vols DE and CE storage
1291   DIR* dir = opendir(externalPrivateMountPath);
1292   if (dir == nullptr) {
1293     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1294   }
1295   struct dirent* ent;
1296   while ((ent = readdir(dir))) {
1297     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1298     if (ent->d_type != DT_DIR) {
1299       fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1300     }
1301     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1302     auto cePath = StringPrintf("%s/user", volPath.c_str());
1303     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1304     // Wait until dir user is created.
1305     WaitUntilDirReady(cePath.c_str(), fail_fn);
1306     MountAppDataTmpFs(cePath.c_str(), fail_fn);
1307     // Wait until dir user_de is created.
1308     WaitUntilDirReady(dePath.c_str(), fail_fn);
1309     MountAppDataTmpFs(dePath.c_str(), fail_fn);
1310   }
1311   closedir(dir);
1312 
1313   // No bind mounting of app data should occur in the case of a sandbox process since SDK sandboxes
1314   // should not be able to read app data. Tmpfs was mounted however since a sandbox should not have
1315   // access to app data.
1316   appid_t appId = multiuser_get_app_id(uid);
1317   bool isSdkSandboxProcess =
1318           (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END);
1319   if (!isSdkSandboxProcess) {
1320       // Prepare default dirs for user 0 as user 0 always exists.
1321       int result = symlink("/data/data", "/data/user/0");
1322       if (result != 0) {
1323           fail_fn(CREATE_ERROR("Failed to create symlink /data/user/0 %s", strerror(errno)));
1324       }
1325       PrepareDirIfNotPresent("/data/user_de/0", DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1326                              fail_fn);
1327 
1328       for (int i = 0; i < size; i += 3) {
1329           std::string const& packageName = merged_data_info_list[i];
1330           std::string const& volUuid = merged_data_info_list[i + 1];
1331           std::string const& inode = merged_data_info_list[i + 2];
1332 
1333           std::string::size_type sz;
1334           long long ceDataInode = std::stoll(inode, &sz);
1335 
1336           std::string actualCePath, actualDePath;
1337           if (volUuid.compare("null") != 0) {
1338               // Volume that is stored in /mnt/expand
1339               char volPath[PATH_MAX];
1340               char volCePath[PATH_MAX];
1341               char volDePath[PATH_MAX];
1342               char volCeUserPath[PATH_MAX];
1343               char volDeUserPath[PATH_MAX];
1344 
1345               snprintf(volPath, PATH_MAX, "/mnt/expand/%s", volUuid.c_str());
1346               snprintf(volCePath, PATH_MAX, "%s/user", volPath);
1347               snprintf(volDePath, PATH_MAX, "%s/user_de", volPath);
1348               snprintf(volCeUserPath, PATH_MAX, "%s/%d", volCePath, userId);
1349               snprintf(volDeUserPath, PATH_MAX, "%s/%d", volDePath, userId);
1350 
1351               PrepareDirIfNotPresent(volPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1352                                      fail_fn);
1353               PrepareDirIfNotPresent(volCePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1354                                      fail_fn);
1355               PrepareDirIfNotPresent(volDePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1356                                      fail_fn);
1357               PrepareDirIfNotPresent(volCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1358                                      fail_fn);
1359               PrepareDirIfNotPresent(volDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
1360                                      fail_fn);
1361 
1362               actualCePath = volCeUserPath;
1363               actualDePath = volDeUserPath;
1364           } else {
1365               // Internal volume that stored in /data
1366               char internalCeUserPath[PATH_MAX];
1367               char internalDeUserPath[PATH_MAX];
1368               snprintf(internalCeUserPath, PATH_MAX, "/data/user/%d", userId);
1369               snprintf(internalDeUserPath, PATH_MAX, "/data/user_de/%d", userId);
1370               // If it's not user 0, create /data/user/$USER.
1371               if (userId == 0) {
1372                   actualCePath = internalLegacyCePath;
1373               } else {
1374                   PrepareDirIfNotPresent(internalCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1375                                          AID_ROOT, fail_fn);
1376                   actualCePath = internalCeUserPath;
1377               }
1378               PrepareDirIfNotPresent(internalDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT,
1379                                      AID_ROOT, fail_fn);
1380               actualDePath = internalDeUserPath;
1381           }
1382           isolateAppDataPerPackage(userId, packageName, volUuid, ceDataInode, actualCePath,
1383                                    actualDePath, fail_fn);
1384       }
1385   }
1386 
1387   // We set the label AFTER everything is done, as we are applying
1388   // the file operations on tmpfs. If we set the label when we mount
1389   // tmpfs, SELinux will not happy as we are changing system_data_files.
1390   // Relabel dir under /data/user, including /data/user/0
1391   relabelSubdirs(internalCePath, dataFileContext, fail_fn);
1392 
1393   // Relabel /data/user
1394   relabelDir(internalCePath, dataUserdirContext, fail_fn);
1395 
1396   // Relabel /data/data
1397   relabelDir(internalLegacyCePath, dataFileContext, fail_fn);
1398 
1399   // Relabel subdirectories of /data/user_de
1400   relabelSubdirs(internalDePath, dataFileContext, fail_fn);
1401 
1402   // Relabel /data/user_de
1403   relabelDir(internalDePath, dataUserdirContext, fail_fn);
1404 
1405   // Relabel CE and DE dirs under /mnt/expand
1406   dir = opendir(externalPrivateMountPath);
1407   if (dir == nullptr) {
1408     fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1409   }
1410   while ((ent = readdir(dir))) {
1411     if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1412     auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1413     auto cePath = StringPrintf("%s/user", volPath.c_str());
1414     auto dePath = StringPrintf("%s/user_de", volPath.c_str());
1415 
1416     relabelSubdirs(cePath.c_str(), dataFileContext, fail_fn);
1417     relabelDir(cePath.c_str(), dataUserdirContext, fail_fn);
1418     relabelSubdirs(dePath.c_str(), dataFileContext, fail_fn);
1419     relabelDir(dePath.c_str(), dataUserdirContext, fail_fn);
1420   }
1421   closedir(dir);
1422 
1423   freecon(dataUserdirContext);
1424   freecon(dataFileContext);
1425 }
1426 
1427 /**
1428  * Without sdk sandbox data isolation, the sandbox could detect if another app is installed on the
1429  * system by "touching" other data directories like /data/misc_ce/0/sdksandbox/com.whatsapp, similar
1430  * to apps without app data isolation (see {@link #isolateAppData()}).
1431  *
1432  * To prevent this, tmpfs is mounted onto misc_ce and misc_de directories on all possible volumes in
1433  * a separate mount namespace. The sandbox directory path is then created containing the name of the
1434  * client app package associated with the sdk sandbox. The contents for this (sdk level storage and
1435  * shared sdk storage) are bind mounted from the sandbox data mirror.
1436  */
isolateSdkSandboxData(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1437 static void isolateSdkSandboxData(JNIEnv* env, jobjectArray pkg_data_info_list, uid_t uid,
1438                                   const char* process_name, jstring managed_nice_name,
1439                                   fail_fn_t fail_fn) {
1440     const userid_t userId = multiuser_get_user_id(uid);
1441 
1442     int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1443     // The sandbox should only have information of one associated client app (package, uuid, inode)
1444     if (size != 3) {
1445         fail_fn(CREATE_ERROR(
1446                 "Unable to isolate sandbox data, incorrect associated app information"));
1447     }
1448 
1449     auto extract_fn = [env, process_name, managed_nice_name,
1450                        pkg_data_info_list](int info_list_idx) {
1451         jstring jstr = (jstring)(env->GetObjectArrayElement(pkg_data_info_list, info_list_idx));
1452         return ExtractJString(env, process_name, managed_nice_name, jstr).value();
1453     };
1454     std::string packageName = extract_fn(0);
1455     std::string volUuid = extract_fn(1);
1456 
1457     char internalCePath[PATH_MAX];
1458     char internalDePath[PATH_MAX];
1459     char externalPrivateMountPath[PATH_MAX];
1460     snprintf(internalCePath, PATH_MAX, "/data/misc_ce");
1461     snprintf(internalDePath, PATH_MAX, "/data/misc_de");
1462     snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
1463 
1464     char ceUserPath[PATH_MAX];
1465     char deUserPath[PATH_MAX];
1466     if (volUuid != "null") {
1467         snprintf(ceUserPath, PATH_MAX, "%s/%s/misc_ce/%d", externalPrivateMountPath,
1468                  volUuid.c_str(), userId);
1469         snprintf(deUserPath, PATH_MAX, "%s/%s/misc_de/%d", externalPrivateMountPath,
1470                  volUuid.c_str(), userId);
1471     } else {
1472         snprintf(ceUserPath, PATH_MAX, "%s/%d", internalCePath, userId);
1473         snprintf(deUserPath, PATH_MAX, "%s/%d", internalDePath, userId);
1474     }
1475 
1476     char ceSandboxPath[PATH_MAX];
1477     char deSandboxPath[PATH_MAX];
1478     snprintf(ceSandboxPath, PATH_MAX, "%s/sdksandbox", ceUserPath);
1479     snprintf(deSandboxPath, PATH_MAX, "%s/sdksandbox", deUserPath);
1480 
1481     // If the client app using the sandbox has been installed when the device is locked and the
1482     // sandbox starts up when the device is locked, sandbox storage might not have been created.
1483     // In that case, mount tmpfs for data isolation, but don't bind mount.
1484     bool bindMountCeSandboxDataDirs = true;
1485     bool bindMountDeSandboxDataDirs = true;
1486     if (access(ceSandboxPath, F_OK) != 0) {
1487         bindMountCeSandboxDataDirs = false;
1488     }
1489     if (access(deSandboxPath, F_OK) != 0) {
1490         bindMountDeSandboxDataDirs = false;
1491     }
1492 
1493     char* context = nullptr;
1494     char* userContext = nullptr;
1495     char* sandboxContext = nullptr;
1496     if (getfilecon(internalDePath, &context) < 0) {
1497         fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", internalDePath, strerror(errno)));
1498     }
1499     if (bindMountDeSandboxDataDirs) {
1500         if (getfilecon(deUserPath, &userContext) < 0) {
1501             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deUserPath, strerror(errno)));
1502         }
1503         if (getfilecon(deSandboxPath, &sandboxContext) < 0) {
1504             fail_fn(CREATE_ERROR("Unable to getfilecon on %s %s", deSandboxPath, strerror(errno)));
1505         }
1506     }
1507 
1508     MountAppDataTmpFs(internalCePath, fail_fn);
1509     MountAppDataTmpFs(internalDePath, fail_fn);
1510 
1511     // Mount tmpfs on all external volumes
1512     DIR* dir = opendir(externalPrivateMountPath);
1513     if (dir == nullptr) {
1514         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1515     }
1516     struct dirent* ent;
1517     while ((ent = readdir(dir))) {
1518         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1519         if (ent->d_type != DT_DIR) {
1520             fail_fn(CREATE_ERROR("Unexpected type: %d %s", ent->d_type, ent->d_name));
1521         }
1522         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1523         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1524         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1525 
1526         WaitUntilDirReady(externalCePath.c_str(), fail_fn);
1527         MountAppDataTmpFs(externalCePath.c_str(), fail_fn);
1528         WaitUntilDirReady(externalDePath.c_str(), fail_fn);
1529         MountAppDataTmpFs(externalDePath.c_str(), fail_fn);
1530     }
1531     closedir(dir);
1532 
1533     char mirrorCeSandboxPath[PATH_MAX];
1534     char mirrorDeSandboxPath[PATH_MAX];
1535     snprintf(mirrorCeSandboxPath, PATH_MAX, "/data_mirror/misc_ce/%s/%d/sdksandbox",
1536              volUuid.c_str(), userId);
1537     snprintf(mirrorDeSandboxPath, PATH_MAX, "/data_mirror/misc_de/%s/%d/sdksandbox",
1538              volUuid.c_str(), userId);
1539 
1540     if (bindMountCeSandboxDataDirs) {
1541         PrepareDir(ceUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1542         PrepareDir(ceSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1543         // TODO(b/231322885): Use inode numbers to find the correct app path when the device locked.
1544         createAndMountAppData(packageName, packageName, mirrorCeSandboxPath, ceSandboxPath, fail_fn,
1545                               true /*call_fail_fn*/);
1546 
1547         relabelDir(ceSandboxPath, sandboxContext, fail_fn);
1548         relabelDir(ceUserPath, userContext, fail_fn);
1549     }
1550     if (bindMountDeSandboxDataDirs) {
1551         PrepareDir(deUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1552         PrepareDir(deSandboxPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1553         createAndMountAppData(packageName, packageName, mirrorDeSandboxPath, deSandboxPath, fail_fn,
1554                               true /*call_fail_fn*/);
1555 
1556         relabelDir(deSandboxPath, sandboxContext, fail_fn);
1557         relabelDir(deUserPath, userContext, fail_fn);
1558     }
1559 
1560     // We set the label AFTER everything is done, as we are applying
1561     // the file operations on tmpfs. If we set the label when we mount
1562     // tmpfs, SELinux will not happy as we are changing system_data_files.
1563     relabelDir(internalCePath, context, fail_fn);
1564     relabelDir(internalDePath, context, fail_fn);
1565 
1566     // Relabel CE and DE dirs under /mnt/expand
1567     dir = opendir(externalPrivateMountPath);
1568     if (dir == nullptr) {
1569         fail_fn(CREATE_ERROR("Failed to opendir %s", externalPrivateMountPath));
1570     }
1571     while ((ent = readdir(dir))) {
1572         if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue;
1573         auto volPath = StringPrintf("%s/%s", externalPrivateMountPath, ent->d_name);
1574         auto externalCePath = StringPrintf("%s/misc_ce", volPath.c_str());
1575         auto externalDePath = StringPrintf("%s/misc_de", volPath.c_str());
1576         relabelDir(externalCePath.c_str(), context, fail_fn);
1577         relabelDir(externalDePath.c_str(), context, fail_fn);
1578     }
1579     closedir(dir);
1580 
1581     if (bindMountDeSandboxDataDirs) {
1582         freecon(sandboxContext);
1583         freecon(userContext);
1584     }
1585     freecon(context);
1586 }
1587 
insertPackagesToMergedList(JNIEnv * env,std::vector<std::string> & merged_data_info_list,jobjectArray data_info_list,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1588 static void insertPackagesToMergedList(JNIEnv* env,
1589   std::vector<std::string>& merged_data_info_list,
1590   jobjectArray data_info_list, const char* process_name,
1591   jstring managed_nice_name, fail_fn_t fail_fn) {
1592 
1593   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1594 
1595   int size = (data_info_list != nullptr) ? env->GetArrayLength(data_info_list) : 0;
1596   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1597   if ((size % 3) != 0) {
1598     fail_fn(CREATE_ERROR("Wrong data_info_list size %d", size));
1599   }
1600 
1601   for (int i = 0; i < size; i += 3) {
1602     jstring package_str = (jstring) (env->GetObjectArrayElement(data_info_list, i));
1603     std::string packageName = extract_fn(package_str).value();
1604     merged_data_info_list.push_back(packageName);
1605 
1606     jstring vol_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 1));
1607     std::string volUuid = extract_fn(vol_str).value();
1608     merged_data_info_list.push_back(volUuid);
1609 
1610     jstring inode_str = (jstring) (env->GetObjectArrayElement(data_info_list, i + 2));
1611     std::string inode = extract_fn(inode_str).value();
1612     merged_data_info_list.push_back(inode);
1613   }
1614 }
1615 
isolateAppData(JNIEnv * env,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1616 static void isolateAppData(JNIEnv* env, jobjectArray pkg_data_info_list,
1617                            jobjectArray allowlisted_data_info_list, uid_t uid,
1618                            const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1619     std::vector<std::string> merged_data_info_list;
1620     insertPackagesToMergedList(env, merged_data_info_list, pkg_data_info_list, process_name,
1621                                managed_nice_name, fail_fn);
1622     insertPackagesToMergedList(env, merged_data_info_list, allowlisted_data_info_list, process_name,
1623                                managed_nice_name, fail_fn);
1624 
1625     isolateAppData(env, merged_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1626 }
1627 
1628 /**
1629  * Like isolateAppData(), isolate jit profile directories, so apps don't see what
1630  * other apps are installed by reading content inside /data/misc/profiles/cur.
1631  *
1632  * The implementation is similar to isolateAppData(), it creates a tmpfs
1633  * on /data/misc/profiles/cur, and bind mounts related package profiles to it.
1634  */
isolateJitProfile(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1635 static void isolateJitProfile(JNIEnv* env, jobjectArray pkg_data_info_list,
1636     uid_t uid, const char* process_name, jstring managed_nice_name,
1637     fail_fn_t fail_fn) {
1638 
1639   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1640   const userid_t user_id = multiuser_get_user_id(uid);
1641 
1642   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1643   // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
1644   if ((size % 3) != 0) {
1645     fail_fn(CREATE_ERROR("Wrong pkg_inode_list size %d", size));
1646   }
1647 
1648   // Mount (namespace) tmpfs on profile directory, so apps no longer access
1649   // the original profile directory anymore.
1650   MountAppDataTmpFs(kCurProfileDirPath, fail_fn);
1651   MountAppDataTmpFs(kRefProfileDirPath, fail_fn);
1652 
1653   // Sandbox processes do not have JIT profile, so no data needs to be bind mounted. However, it
1654   // should still not have access to JIT profile, so tmpfs is mounted.
1655   appid_t appId = multiuser_get_app_id(uid);
1656   if (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END) {
1657       return;
1658   }
1659 
1660   // Create profile directory for this user.
1661   std::string actualCurUserProfile = StringPrintf("%s/%d", kCurProfileDirPath, user_id);
1662   PrepareDir(actualCurUserProfile, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
1663 
1664   for (int i = 0; i < size; i += 3) {
1665     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1666     std::string packageName = extract_fn(package_str).value();
1667 
1668     std::string actualCurPackageProfile = StringPrintf("%s/%s", actualCurUserProfile.c_str(),
1669         packageName.c_str());
1670     std::string mirrorCurPackageProfile = StringPrintf("/data_mirror/cur_profiles/%d/%s",
1671         user_id, packageName.c_str());
1672     std::string actualRefPackageProfile = StringPrintf("%s/%s", kRefProfileDirPath,
1673         packageName.c_str());
1674     std::string mirrorRefPackageProfile = StringPrintf("/data_mirror/ref_profiles/%s",
1675         packageName.c_str());
1676 
1677     if (access(mirrorCurPackageProfile.c_str(), F_OK) != 0) {
1678       ALOGW("Can't access app profile directory: %s", mirrorCurPackageProfile.c_str());
1679       continue;
1680     }
1681     if (access(mirrorRefPackageProfile.c_str(), F_OK) != 0) {
1682       ALOGW("Can't access app profile directory: %s", mirrorRefPackageProfile.c_str());
1683       continue;
1684     }
1685 
1686     PrepareDir(actualCurPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1687     BindMount(mirrorCurPackageProfile, actualCurPackageProfile, fail_fn);
1688     PrepareDir(actualRefPackageProfile, DEFAULT_DATA_DIR_PERMISSION, uid, uid, fail_fn);
1689     BindMount(mirrorRefPackageProfile, actualRefPackageProfile, fail_fn);
1690   }
1691 }
1692 
WaitUntilDirReady(const std::string & target,fail_fn_t fail_fn)1693 static void WaitUntilDirReady(const std::string& target, fail_fn_t fail_fn) {
1694   unsigned int sleepIntervalUs = STORAGE_DIR_CHECK_INIT_INTERVAL_US;
1695 
1696   // This is just an approximate value as it doesn't need to be very accurate.
1697   unsigned int sleepTotalUs = 0;
1698 
1699   const char* dir_path = target.c_str();
1700   while (sleepTotalUs < STORAGE_DIR_CHECK_TIMEOUT_US) {
1701     if (access(dir_path, F_OK) == 0) {
1702       return;
1703     }
1704     // Failed, so we add exponential backoff and retry
1705     usleep(sleepIntervalUs);
1706     sleepTotalUs += sleepIntervalUs;
1707     sleepIntervalUs = std::min<unsigned int>(
1708         sleepIntervalUs * STORAGE_DIR_CHECK_RETRY_MULTIPLIER,
1709         STORAGE_DIR_CHECK_MAX_INTERVAL_US);
1710   }
1711   // Last chance and get the latest errno if it fails.
1712   if (access(dir_path, F_OK) == 0) {
1713     return;
1714   }
1715   fail_fn(CREATE_ERROR("Error dir is not ready %s: %s", dir_path, strerror(errno)));
1716 }
1717 
1718 // All public String android.os.Build constants, and the system properties they're pulled from
1719 std::pair<const char*, const char*> build_constants[] = {
1720         std::pair("ID", "ro.build.id"),
1721         std::pair("DISPLAY", "ro.build.display.id"),
1722         std::pair("PRODUCT", "ro.product.name"),
1723         std::pair("DEVICE", "ro.product.device"),
1724         std::pair("BOARD", "ro.product.board"),
1725         std::pair("MANUFACTURER", "ro.product.manufacturer"),
1726         std::pair("BRAND", "ro.product.brand"),
1727         std::pair("MODEL", "ro.product.model"),
1728         std::pair("BOOTLOADER", "ro.bootloader"),
1729         std::pair("HARDWARE", "ro.hardware"),
1730         std::pair("SKU", "ro.boot.hardware.sku"),
1731         std::pair("ODM_SKU", "ro.boot.product.hardware.sku"),
1732         std::pair("TAGS", "ro.build.tags"),
1733         std::pair("TYPE", "ro.build.type"),
1734         std::pair("USER", "ro.build.user"),
1735         std::pair("HOST", "ro.build.host"),
1736 };
1737 
1738 // All public String Build.VERSION constants, and the system properties they're pulled from
1739 std::pair<const char*, const char*> build_version_constants[] = {
1740         std::pair("INCREMENTAL", "ro.build.version.incremental"),
1741         std::pair("RELEASE", "ro.build.version.release"),
1742         std::pair("RELEASE_OR_CODENAME", "ro.build.version.release_or_codename"),
1743         std::pair("RELEASE_OR_PREVIEW_DISPLAY", "ro.build.version.release_or_preview_display"),
1744         std::pair("BASE_OS", "ro.build.version.base_os"),
1745         std::pair("SECURITY_PATCH", "ro.build.version.security_patch"),
1746         std::pair("SDK", "ro.build.version.sdk"),
1747         std::pair("PREVIEW_SDK_FINGERPRINT", "ro.build.version.preview_sdk_fingerprint"),
1748         std::pair("CODENAME", "ro.build.version.codename"),
1749 };
1750 
ReloadBuildJavaConstant(JNIEnv * env,jclass build_class,const char * field_name,const char * field_signature,const char * sysprop_name)1751 static void ReloadBuildJavaConstant(JNIEnv* env, jclass build_class, const char* field_name,
1752                                     const char* field_signature, const char* sysprop_name) {
1753   const prop_info* prop_info = __system_property_find(sysprop_name);
1754   std::string new_value;
1755   __system_property_read_callback(
1756           prop_info,
1757           [](void* cookie, const char* name, const char* value, unsigned serial) {
1758               auto new_value = reinterpret_cast<std::string*>(cookie);
1759               *new_value = value;
1760           },
1761           &new_value);
1762   jfieldID fieldId = env->GetStaticFieldID(build_class, field_name, field_signature);
1763   if (strcmp(field_signature, "I") == 0) {
1764     env->SetStaticIntField(build_class, fieldId, jint(strtol(new_value.c_str(), nullptr, 0)));
1765   } else if (strcmp(field_signature, "Ljava/lang/String;") == 0) {
1766     jstring string_val = env->NewStringUTF(new_value.c_str());
1767     env->SetStaticObjectField(build_class, fieldId, string_val);
1768   } else if (strcmp(field_signature, "[Ljava/lang/String;") == 0) {
1769     auto stream = std::stringstream(new_value);
1770     std::vector<std::string> items;
1771     std::string segment;
1772     while (std::getline(stream, segment, ',')) {
1773       items.push_back(segment);
1774     }
1775     jclass string_class = env->FindClass("java/lang/String");
1776     jobjectArray string_arr = env->NewObjectArray(items.size(), string_class, nullptr);
1777     for (size_t i = 0; i < items.size(); i++) {
1778       jstring string_arr_val = env->NewStringUTF(items.at(i).c_str());
1779       env->SetObjectArrayElement(string_arr, i, string_arr_val);
1780     }
1781     env->SetStaticObjectField(build_class, fieldId, string_arr);
1782   } else if (strcmp(field_signature, "J") == 0) {
1783     env->SetStaticLongField(build_class, fieldId, jlong(strtoll(new_value.c_str(), nullptr, 0)));
1784   }
1785 }
1786 
ReloadBuildJavaConstants(JNIEnv * env)1787 static void ReloadBuildJavaConstants(JNIEnv* env) {
1788   jclass build_cls = env->FindClass("android/os/Build");
1789   size_t arr_size = sizeof(build_constants) / sizeof(build_constants[0]);
1790   for (size_t i = 0; i < arr_size; i++) {
1791     const char* field_name = build_constants[i].first;
1792     const char* sysprop_name = build_constants[i].second;
1793     ReloadBuildJavaConstant(env, build_cls, field_name, "Ljava/lang/String;", sysprop_name);
1794   }
1795   jclass build_version_cls = env->FindClass("android/os/Build$VERSION");
1796   arr_size = sizeof(build_version_constants) / sizeof(build_version_constants[0]);
1797   for (size_t i = 0; i < arr_size; i++) {
1798     const char* field_name = build_version_constants[i].first;
1799     const char* sysprop_name = build_version_constants[i].second;
1800     ReloadBuildJavaConstant(env, build_version_cls, field_name, "Ljava/lang/String;", sysprop_name);
1801   }
1802 
1803   // Reload the public String[] constants
1804   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_ABIS", "[Ljava/lang/String;",
1805                           "ro.product.cpu.abilist");
1806   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_32_BIT_ABIS", "[Ljava/lang/String;",
1807                           "ro.product.cpu.abilist32");
1808   ReloadBuildJavaConstant(env, build_cls, "SUPPORTED_64_BIT_ABIS", "[Ljava/lang/String;",
1809                           "ro.product.cpu.abilist64");
1810   ReloadBuildJavaConstant(env, build_version_cls, "ALL_CODENAMES", "[Ljava/lang/String;",
1811                           "ro.build.version.all_codenames");
1812 
1813   // Reload the public int/long constants
1814   ReloadBuildJavaConstant(env, build_cls, "TIME", "J", "ro.build.date.utc");
1815   ReloadBuildJavaConstant(env, build_version_cls, "SDK_INT", "I", "ro.build.version.sdk");
1816   ReloadBuildJavaConstant(env, build_version_cls, "PREVIEW_SDK_INT", "I",
1817                           "ro.build.version.preview_sdk");
1818 
1819   // Re-derive the fingerprint
1820   jmethodID derive_fingerprint =
1821           env->GetStaticMethodID(build_cls, "deriveFingerprint", "()Ljava/lang/String;");
1822   auto new_fingerprint = (jstring)(env->CallStaticObjectMethod(build_cls, derive_fingerprint));
1823   jfieldID fieldId = env->GetStaticFieldID(build_cls, "FINGERPRINT", "Ljava/lang/String;");
1824   env->SetStaticObjectField(build_cls, fieldId, new_fingerprint);
1825 }
1826 
BindMountSyspropOverride(fail_fn_t fail_fn,JNIEnv * env)1827 static void BindMountSyspropOverride(fail_fn_t fail_fn, JNIEnv* env) {
1828   std::string source = "/dev/__properties__/appcompat_override";
1829   std::string target = "/dev/__properties__";
1830   if (access(source.c_str(), F_OK) != 0) {
1831       return;
1832   }
1833   if (access(target.c_str(), F_OK) != 0) {
1834       return;
1835   }
1836   BindMount(source, target, fail_fn);
1837   // Reload the system properties file, to ensure new values are read into memory
1838   __system_properties_zygote_reload();
1839   // android.os.Build constants are pulled from system properties, so they must be reloaded, too
1840   ReloadBuildJavaConstants(env);
1841 }
1842 
BindMountStorageToLowerFs(const userid_t user_id,const uid_t uid,const char * dir_name,const char * package,fail_fn_t fail_fn)1843 static void BindMountStorageToLowerFs(const userid_t user_id, const uid_t uid,
1844     const char* dir_name, const char* package, fail_fn_t fail_fn) {
1845     bool hasSdcardFs = IsSdcardfsUsed();
1846     std::string source;
1847     if (hasSdcardFs) {
1848         source = StringPrintf("/mnt/runtime/default/emulated/%d/%s/%s", user_id, dir_name, package);
1849     } else {
1850         source = StringPrintf("/mnt/pass_through/%d/emulated/%d/%s/%s", user_id, user_id, dir_name,
1851                               package);
1852     }
1853 
1854   // Directory might be not ready, as prepareStorageDirs() is running asynchronously in ProcessList,
1855   // so wait until dir is created.
1856   WaitUntilDirReady(source, fail_fn);
1857   std::string target = StringPrintf("/storage/emulated/%d/%s/%s", user_id, dir_name, package);
1858 
1859   // As the parent is mounted as tmpfs, we need to create the target dir here.
1860   PrepareDirIfNotPresent(target, 0700, uid, uid, fail_fn);
1861 
1862   if (access(source.c_str(), F_OK) != 0) {
1863     fail_fn(CREATE_ERROR("Error accessing %s: %s", source.c_str(), strerror(errno)));
1864   }
1865   if (access(target.c_str(), F_OK) != 0) {
1866     fail_fn(CREATE_ERROR("Error accessing %s: %s", target.c_str(), strerror(errno)));
1867   }
1868   BindMount(source, target, fail_fn);
1869 }
1870 
1871 // Mount tmpfs on Android/data and Android/obb, then bind mount all app visible package
1872 // directories in data and obb directories.
BindMountStorageDirs(JNIEnv * env,jobjectArray pkg_data_info_list,uid_t uid,const char * process_name,jstring managed_nice_name,fail_fn_t fail_fn)1873 static void BindMountStorageDirs(JNIEnv* env, jobjectArray pkg_data_info_list,
1874     uid_t uid, const char* process_name, jstring managed_nice_name, fail_fn_t fail_fn) {
1875 
1876   auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1877   const userid_t user_id = multiuser_get_user_id(uid);
1878 
1879   // Fuse is ready, so we can start using fuse path.
1880   int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
1881 
1882   // Create tmpfs on Android/obb and Android/data so these 2 dirs won't enter fuse anymore.
1883   std::string androidObbDir = StringPrintf("/storage/emulated/%d/Android/obb", user_id);
1884   MountAppDataTmpFs(androidObbDir, fail_fn);
1885   std::string androidDataDir = StringPrintf("/storage/emulated/%d/Android/data", user_id);
1886   MountAppDataTmpFs(androidDataDir, fail_fn);
1887 
1888   // Bind mount each package obb directory
1889   for (int i = 0; i < size; i += 3) {
1890     jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
1891     std::string packageName = extract_fn(package_str).value();
1892     BindMountStorageToLowerFs(user_id, uid, "Android/obb", packageName.c_str(), fail_fn);
1893     BindMountStorageToLowerFs(user_id, uid, "Android/data", packageName.c_str(), fail_fn);
1894   }
1895 }
1896 
1897 // Utility routine to specialize a zygote child process.
SpecializeCommon(JNIEnv * env,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities,jlong bounding_capabilities,jint mount_external,jstring managed_se_info,jstring managed_nice_name,bool is_system_server,bool is_child_zygote,jstring managed_instruction_set,jstring managed_app_data_dir,bool is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,bool mount_data_dirs,bool mount_storage_dirs,bool mount_sysprop_overrides)1898 static void SpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray gids, jint runtime_flags,
1899                              jobjectArray rlimits, jlong permitted_capabilities,
1900                              jlong effective_capabilities, jlong bounding_capabilities,
1901                              jint mount_external, jstring managed_se_info,
1902                              jstring managed_nice_name, bool is_system_server, bool is_child_zygote,
1903                              jstring managed_instruction_set, jstring managed_app_data_dir,
1904                              bool is_top_app, jobjectArray pkg_data_info_list,
1905                              jobjectArray allowlisted_data_info_list, bool mount_data_dirs,
1906                              bool mount_storage_dirs, bool mount_sysprop_overrides) {
1907     const char* process_name = is_system_server ? "system_server" : "zygote";
1908     auto fail_fn = std::bind(ZygoteFailure, env, process_name, managed_nice_name, _1);
1909     auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
1910 
1911     auto se_info = extract_fn(managed_se_info);
1912     auto nice_name = extract_fn(managed_nice_name);
1913     auto instruction_set = extract_fn(managed_instruction_set);
1914     auto app_data_dir = extract_fn(managed_app_data_dir);
1915 
1916     // Permit bounding capabilities
1917     permitted_capabilities |= bounding_capabilities;
1918 
1919     // Keep capabilities across UID change, unless we're staying root.
1920     if (uid != 0) {
1921         EnableKeepCapabilities(fail_fn);
1922     }
1923 
1924     SetInheritable(permitted_capabilities, fail_fn);
1925 
1926     DropCapabilitiesBoundingSet(fail_fn, bounding_capabilities);
1927 
1928     bool need_pre_initialize_native_bridge = !is_system_server && instruction_set.has_value() &&
1929             android::NativeBridgeAvailable() &&
1930             // Native bridge may be already initialized if this
1931             // is an app forked from app-zygote.
1932             !android::NativeBridgeInitialized() &&
1933             android::NeedsNativeBridge(instruction_set.value().c_str());
1934 
1935     MountEmulatedStorage(uid, mount_external, need_pre_initialize_native_bridge, fail_fn);
1936 
1937     // Make sure app is running in its own mount namespace before isolating its data directories.
1938     ensureInAppMountNamespace(fail_fn);
1939 
1940     // Isolate app data, jit profile and sandbox data directories by overlaying a tmpfs on those
1941     // dirs and bind mount all related packages separately.
1942     if (mount_data_dirs) {
1943         // Sdk sandbox data isolation does not need to occur for app processes since sepolicy
1944         // prevents access to sandbox data anyway.
1945         appid_t appId = multiuser_get_app_id(uid);
1946         if (appId >= AID_SDK_SANDBOX_PROCESS_START && appId <= AID_SDK_SANDBOX_PROCESS_END) {
1947             isolateSdkSandboxData(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1948                                   fail_fn);
1949         }
1950         isolateAppData(env, pkg_data_info_list, allowlisted_data_info_list, uid, process_name,
1951                        managed_nice_name, fail_fn);
1952         isolateJitProfile(env, pkg_data_info_list, uid, process_name, managed_nice_name, fail_fn);
1953     }
1954     // MOUNT_EXTERNAL_INSTALLER, MOUNT_EXTERNAL_PASS_THROUGH, MOUNT_EXTERNAL_ANDROID_WRITABLE apps
1955     // will have mount_storage_dirs == false here (set by ProcessList.needsStorageDataIsolation()),
1956     // and hence they won't bind mount storage dirs.
1957     if (mount_storage_dirs) {
1958         BindMountStorageDirs(env, pkg_data_info_list, uid, process_name, managed_nice_name,
1959                              fail_fn);
1960     }
1961 
1962     if (mount_sysprop_overrides) {
1963         BindMountSyspropOverride(fail_fn, env);
1964     }
1965 
1966     // If this zygote isn't root, it won't be able to create a process group,
1967     // since the directory is owned by root.
1968     if (getuid() == 0) {
1969         const int rc = createProcessGroup(uid, getpid());
1970         if (rc != 0) {
1971             fail_fn(rc == -EROFS ? CREATE_ERROR("createProcessGroup failed, kernel missing "
1972                                                 "CONFIG_CGROUP_CPUACCT?")
1973                                  : CREATE_ERROR("createProcessGroup(%d, %d) failed: %s", uid,
1974                                                 /* pid= */ 0, strerror(-rc)));
1975         }
1976     }
1977 
1978     SetGids(env, gids, is_child_zygote, fail_fn);
1979     SetRLimits(env, rlimits, fail_fn);
1980 
1981     if (need_pre_initialize_native_bridge) {
1982         // Due to the logic behind need_pre_initialize_native_bridge we know that
1983         // instruction_set contains a value.
1984         android::PreInitializeNativeBridge(app_data_dir.has_value() ? app_data_dir.value().c_str()
1985                                                                     : nullptr,
1986                                            instruction_set.value().c_str());
1987     }
1988 
1989     if (is_system_server && !(runtime_flags & RuntimeFlags::PROFILE_SYSTEM_SERVER)) {
1990         // Prefetch the classloader for the system server. This is done early to
1991         // allow a tie-down of the proper system server selinux domain.
1992         // We don't prefetch when the system server is being profiled to avoid
1993         // loading AOT code.
1994         env->CallStaticObjectMethod(gZygoteInitClass, gGetOrCreateSystemServerClassLoader);
1995         if (env->ExceptionCheck()) {
1996             // Be robust here. The Java code will attempt to create the classloader
1997             // at a later point (but may not have rights to use AoT artifacts).
1998             env->ExceptionClear();
1999         }
2000         // Also prefetch standalone system server jars. The reason for doing this here is the same
2001         // as above.
2002         env->CallStaticVoidMethod(gZygoteInitClass, gPrefetchStandaloneSystemServerJars);
2003         if (env->ExceptionCheck()) {
2004             env->ExceptionClear();
2005         }
2006     }
2007 
2008     if (setresgid(gid, gid, gid) == -1) {
2009         fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
2010     }
2011 
2012     // Must be called when the new process still has CAP_SYS_ADMIN, in this case,
2013     // before changing uid from 0, which clears capabilities.  The other
2014     // alternative is to call prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that
2015     // breaks SELinux domain transition (see b/71859146).  As the result,
2016     // privileged syscalls used below still need to be accessible in app process.
2017     SetUpSeccompFilter(uid, is_child_zygote);
2018 
2019     // Must be called before losing the permission to set scheduler policy.
2020     SetSchedulerPolicy(fail_fn, is_top_app);
2021 
2022     if (setresuid(uid, uid, uid) == -1) {
2023         fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
2024     }
2025 
2026     // The "dumpable" flag of a process, which controls core dump generation, is
2027     // overwritten by the value in /proc/sys/fs/suid_dumpable when the effective
2028     // user or group ID changes. See proc(5) for possible values. In most cases,
2029     // the value is 0, so core dumps are disabled for zygote children. However,
2030     // when running in a Chrome OS container, the value is already set to 2,
2031     // which allows the external crash reporter to collect all core dumps. Since
2032     // only system crashes are interested, core dump is disabled for app
2033     // processes. This also ensures compliance with CTS.
2034     int dumpable = prctl(PR_GET_DUMPABLE);
2035     if (dumpable == -1) {
2036         ALOGE("prctl(PR_GET_DUMPABLE) failed: %s", strerror(errno));
2037         RuntimeAbort(env, __LINE__, "prctl(PR_GET_DUMPABLE) failed");
2038     }
2039 
2040     if (dumpable == 2 && uid >= AID_APP) {
2041         if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) == -1) {
2042             ALOGE("prctl(PR_SET_DUMPABLE, 0) failed: %s", strerror(errno));
2043             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 0) failed");
2044         }
2045     }
2046 
2047     // Set process properties to enable debugging if required.
2048     if ((runtime_flags & RuntimeFlags::DEBUG_ENABLE_PTRACE) != 0) {
2049         EnableDebugger();
2050         // Don't pass unknown flag to the ART runtime.
2051         runtime_flags &= ~RuntimeFlags::DEBUG_ENABLE_PTRACE;
2052     }
2053     if ((runtime_flags & RuntimeFlags::PROFILE_FROM_SHELL) != 0) {
2054         // simpleperf needs the process to be dumpable to profile it.
2055         if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
2056             ALOGE("prctl(PR_SET_DUMPABLE) failed: %s", strerror(errno));
2057             RuntimeAbort(env, __LINE__, "prctl(PR_SET_DUMPABLE, 1) failed");
2058         }
2059     }
2060 
2061     HeapTaggingLevel heap_tagging_level;
2062     switch (runtime_flags & RuntimeFlags::MEMORY_TAG_LEVEL_MASK) {
2063         case RuntimeFlags::MEMORY_TAG_LEVEL_TBI:
2064             heap_tagging_level = M_HEAP_TAGGING_LEVEL_TBI;
2065             break;
2066         case RuntimeFlags::MEMORY_TAG_LEVEL_ASYNC:
2067             heap_tagging_level = M_HEAP_TAGGING_LEVEL_ASYNC;
2068             break;
2069         case RuntimeFlags::MEMORY_TAG_LEVEL_SYNC:
2070             heap_tagging_level = M_HEAP_TAGGING_LEVEL_SYNC;
2071             break;
2072         default:
2073             heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
2074             break;
2075     }
2076     mallopt(M_BIONIC_SET_HEAP_TAGGING_LEVEL, heap_tagging_level);
2077 
2078     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2079     // runtime.
2080     runtime_flags &= ~RuntimeFlags::MEMORY_TAG_LEVEL_MASK;
2081 
2082     // Avoid heap zero initialization for applications without MTE. Zero init may
2083     // cause app compat problems, use more memory, or reduce performance. While it
2084     // would be nice to have them for apps, we will have to wait until they are
2085     // proven out, have more efficient hardware, and/or apply them only to new
2086     // applications.
2087     if (!(runtime_flags & RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED)) {
2088         mallopt(M_BIONIC_ZERO_INIT, 0);
2089     }
2090 
2091     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2092     // runtime.
2093     runtime_flags &= ~RuntimeFlags::NATIVE_HEAP_ZERO_INIT_ENABLED;
2094 
2095     const char* nice_name_ptr = nice_name.has_value() ? nice_name.value().c_str() : nullptr;
2096     android_mallopt_gwp_asan_options_t gwp_asan_options;
2097     const char* kGwpAsanAppRecoverableSysprop =
2098             "persist.device_config.memory_safety_native.gwp_asan_recoverable_apps";
2099     // The system server doesn't have its nice name set by the time SpecializeCommon is called.
2100     gwp_asan_options.program_name = nice_name_ptr ?: process_name;
2101     switch (runtime_flags & RuntimeFlags::GWP_ASAN_LEVEL_MASK) {
2102         default:
2103         case RuntimeFlags::GWP_ASAN_LEVEL_DEFAULT:
2104             gwp_asan_options.mode = GetBoolProperty(kGwpAsanAppRecoverableSysprop, true)
2105                     ? Mode::APP_MANIFEST_DEFAULT
2106                     : Mode::APP_MANIFEST_NEVER;
2107             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2108             break;
2109         case RuntimeFlags::GWP_ASAN_LEVEL_NEVER:
2110             gwp_asan_options.mode = Mode::APP_MANIFEST_NEVER;
2111             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2112             break;
2113         case RuntimeFlags::GWP_ASAN_LEVEL_ALWAYS:
2114             gwp_asan_options.mode = Mode::APP_MANIFEST_ALWAYS;
2115             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2116             break;
2117         case RuntimeFlags::GWP_ASAN_LEVEL_LOTTERY:
2118             gwp_asan_options.mode = Mode::APP_MANIFEST_DEFAULT;
2119             android_mallopt(M_INITIALIZE_GWP_ASAN, &gwp_asan_options, sizeof(gwp_asan_options));
2120             break;
2121     }
2122     // Now that we've used the flag, clear it so that we don't pass unknown flags to the ART
2123     // runtime.
2124     runtime_flags &= ~RuntimeFlags::GWP_ASAN_LEVEL_MASK;
2125 
2126     SetCapabilities(permitted_capabilities, effective_capabilities, permitted_capabilities,
2127                     fail_fn);
2128 
2129     __android_log_close();
2130     AStatsSocket_close();
2131 
2132     const char* se_info_ptr = se_info.has_value() ? se_info.value().c_str() : nullptr;
2133 
2134     if (selinux_android_setcontext(uid, is_system_server, se_info_ptr, nice_name_ptr) == -1) {
2135         fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
2136                              is_system_server, se_info_ptr, nice_name_ptr));
2137     }
2138 
2139     // Make it easier to debug audit logs by setting the main thread's name to the
2140     // nice name rather than "app_process".
2141     if (nice_name.has_value()) {
2142         SetThreadName(nice_name.value());
2143     } else if (is_system_server) {
2144         SetThreadName("system_server");
2145     }
2146 
2147     // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
2148     UnsetChldSignalHandler();
2149 
2150     if (is_system_server) {
2151         env->CallStaticVoidMethod(gZygoteClass, gCallPostForkSystemServerHooks, runtime_flags);
2152         if (env->ExceptionCheck()) {
2153             fail_fn("Error calling post fork system server hooks.");
2154         }
2155 
2156         // TODO(b/117874058): Remove hardcoded label here.
2157         static const char* kSystemServerLabel = "u:r:system_server:s0";
2158         if (selinux_android_setcon(kSystemServerLabel) != 0) {
2159             fail_fn(CREATE_ERROR("selinux_android_setcon(%s)", kSystemServerLabel));
2160         }
2161     }
2162 
2163     if (is_child_zygote) {
2164         initUnsolSocketToSystemServer();
2165     }
2166 
2167     env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
2168                               is_system_server, is_child_zygote, managed_instruction_set);
2169 
2170     // Reset the process priority to the default value.
2171     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
2172 
2173     if (env->ExceptionCheck()) {
2174         fail_fn("Error calling post fork hooks.");
2175     }
2176 }
2177 
GetEffectiveCapabilityMask(JNIEnv * env)2178 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
2179     __user_cap_header_struct capheader;
2180     memset(&capheader, 0, sizeof(capheader));
2181     capheader.version = _LINUX_CAPABILITY_VERSION_3;
2182     capheader.pid = 0;
2183 
2184     __user_cap_data_struct capdata[2];
2185     if (capget(&capheader, &capdata[0]) == -1) {
2186         ALOGE("capget failed: %s", strerror(errno));
2187         RuntimeAbort(env, __LINE__, "capget failed");
2188     }
2189 
2190     return capdata[0].effective | (static_cast<uint64_t>(capdata[1].effective) << 32);
2191 }
2192 
CalculateBoundingCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids)2193 static jlong CalculateBoundingCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids) {
2194     jlong capabilities = 0;
2195 
2196     /*
2197      * Grant CAP_SYS_NICE to CapInh/CapPrm/CapBnd for processes that can spawn
2198      * VMs.  This enables processes to execve on binaries with elevated
2199      * capabilities if its file capability bits are set. This does not grant
2200      * capability to the parent process(that spawns the VM) as the effective
2201      * bits are not set.
2202      */
2203     if (MatchGid(env, gids, gid, AID_VIRTUALMACHINE)) {
2204         capabilities |= (1LL << CAP_SYS_NICE);
2205     }
2206 
2207     return capabilities;
2208 }
2209 
CalculateCapabilities(JNIEnv * env,jint uid,jint gid,jintArray gids,bool is_child_zygote)2210 static jlong CalculateCapabilities(JNIEnv* env, jint uid, jint gid, jintArray gids,
2211                                    bool is_child_zygote) {
2212   jlong capabilities = 0;
2213 
2214   /*
2215    *  Grant the following capabilities to the Bluetooth user:
2216    *    - CAP_WAKE_ALARM
2217    *    - CAP_NET_ADMIN
2218    *    - CAP_NET_RAW
2219    *    - CAP_NET_BIND_SERVICE (for DHCP client functionality)
2220    *    - CAP_SYS_NICE (for setting RT priority for audio-related threads)
2221    */
2222 
2223   if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
2224     capabilities |= (1LL << CAP_WAKE_ALARM);
2225     capabilities |= (1LL << CAP_NET_ADMIN);
2226     capabilities |= (1LL << CAP_NET_RAW);
2227     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2228     capabilities |= (1LL << CAP_SYS_NICE);
2229   }
2230 
2231   if (multiuser_get_app_id(uid) == AID_NETWORK_STACK) {
2232     capabilities |= (1LL << CAP_WAKE_ALARM);
2233     capabilities |= (1LL << CAP_NET_ADMIN);
2234     capabilities |= (1LL << CAP_NET_BROADCAST);
2235     capabilities |= (1LL << CAP_NET_BIND_SERVICE);
2236     capabilities |= (1LL << CAP_NET_RAW);
2237   }
2238 
2239   /*
2240    * Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
2241    */
2242 
2243   if (MatchGid(env, gids, gid, AID_WAKELOCK)) {
2244     capabilities |= (1LL << CAP_BLOCK_SUSPEND);
2245   }
2246 
2247   /*
2248    * Grant child Zygote processes the following capabilities:
2249    *   - CAP_SETUID (change UID of child processes)
2250    *   - CAP_SETGID (change GID of child processes)
2251    *   - CAP_SETPCAP (change capabilities of child processes)
2252    */
2253 
2254   if (is_child_zygote) {
2255     capabilities |= (1LL << CAP_SETUID);
2256     capabilities |= (1LL << CAP_SETGID);
2257     capabilities |= (1LL << CAP_SETPCAP);
2258   }
2259 
2260   /*
2261    * Containers run without some capabilities, so drop any caps that are not
2262    * available.
2263    */
2264 
2265   return capabilities & GetEffectiveCapabilityMask(env);
2266 }
2267 
2268 /**
2269  * Adds the given information about a newly created unspecialized app
2270  * processes to the Zygote's USAP table.
2271  *
2272  * @param usap_pid  Process ID of the newly created USAP
2273  * @param read_pipe_fd  File descriptor for the read end of the USAP
2274  * reporting pipe.  Used in the ZygoteServer poll loop to track USAP
2275  * specialization.
2276  */
AddUsapTableEntry(pid_t usap_pid,int read_pipe_fd)2277 static void AddUsapTableEntry(pid_t usap_pid, int read_pipe_fd) {
2278   static int sUsapTableInsertIndex = 0;
2279 
2280   int search_index = sUsapTableInsertIndex;
2281   do {
2282     if (gUsapTable[search_index].SetIfInvalid(usap_pid, read_pipe_fd)) {
2283       ++gUsapPoolCount;
2284 
2285       // Start our next search right after where we finished this one.
2286       sUsapTableInsertIndex = (search_index + 1) % gUsapTable.size();
2287 
2288       return;
2289     }
2290 
2291     search_index = (search_index + 1) % gUsapTable.size();
2292   } while (search_index != sUsapTableInsertIndex);
2293 
2294   // Much like money in the banana stand, there should always be an entry
2295   // in the USAP table.
2296   __builtin_unreachable();
2297 }
2298 
2299 /**
2300  * Invalidates the entry in the USAPTable corresponding to the provided
2301  * process ID if it is present.  If an entry was removed the USAP pool
2302  * count is decremented. May be called from signal handler.
2303  *
2304  * @param usap_pid  Process ID of the USAP entry to invalidate
2305  * @return True if an entry was invalidated; false otherwise
2306  */
RemoveUsapTableEntry(pid_t usap_pid)2307 static bool RemoveUsapTableEntry(pid_t usap_pid) {
2308   for (UsapTableEntry& entry : gUsapTable) {
2309     if (entry.ClearForPID(usap_pid)) {
2310       --gUsapPoolCount;
2311       return true;
2312     }
2313   }
2314 
2315   return false;
2316 }
2317 
2318 /**
2319  * @return A vector of the read pipe FDs for each of the active USAPs.
2320  */
MakeUsapPipeReadFDVector()2321 std::vector<int> MakeUsapPipeReadFDVector() {
2322   std::vector<int> fd_vec;
2323   fd_vec.reserve(gUsapTable.size());
2324 
2325   for (UsapTableEntry& entry : gUsapTable) {
2326     auto entry_values = entry.GetValues();
2327 
2328     if (entry_values.has_value()) {
2329       fd_vec.push_back(entry_values.value().read_pipe_fd);
2330     }
2331   }
2332 
2333   return fd_vec;
2334 }
2335 
UnmountStorageOnInit(JNIEnv * env)2336 static void UnmountStorageOnInit(JNIEnv* env) {
2337   // Zygote process unmount root storage space initially before every child processes are forked.
2338   // Every forked child processes (include SystemServer) only mount their own root storage space
2339   // and no need unmount storage operation in MountEmulatedStorage method.
2340   // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
2341 
2342   // See storage config details at http://source.android.com/tech/storage/
2343   // Create private mount namespace shared by all children
2344   if (unshare(CLONE_NEWNS) == -1) {
2345     RuntimeAbort(env, __LINE__, "Failed to unshare()");
2346     return;
2347   }
2348 
2349   // Mark rootfs as being MS_SLAVE so that changes from default
2350   // namespace only flow into our children.
2351   if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
2352     RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
2353     return;
2354   }
2355 
2356   // Create a staging tmpfs that is shared by our children; they will
2357   // bind mount storage into their respective private namespaces, which
2358   // are isolated from each other.
2359   const char* target_base = getenv("EMULATED_STORAGE_TARGET");
2360   if (target_base != nullptr) {
2361 #define STRINGIFY_UID(x) __STRING(x)
2362     if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
2363               "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
2364       ALOGE("Failed to mount tmpfs to %s", target_base);
2365       RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
2366       return;
2367     }
2368 #undef STRINGIFY_UID
2369   }
2370 
2371   UnmountTree("/storage");
2372 }
2373 
2374 }  // anonymous namespace
2375 
2376 namespace android {
2377 
2378 /**
2379  * A failure function used to report fatal errors to the managed runtime.  This
2380  * function is often curried with the process name information and then passed
2381  * to called functions.
2382  *
2383  * @param env  Managed runtime environment
2384  * @param process_name  A native representation of the process name
2385  * @param managed_process_name  A managed representation of the process name
2386  * @param msg  The error message to be reported
2387  */
2388 [[noreturn]]
ZygoteFailure(JNIEnv * env,const char * process_name,jstring managed_process_name,const std::string & msg)2389 void zygote::ZygoteFailure(JNIEnv* env,
2390                            const char* process_name,
2391                            jstring managed_process_name,
2392                            const std::string& msg) {
2393   std::unique_ptr<ScopedUtfChars> scoped_managed_process_name_ptr = nullptr;
2394   if (managed_process_name != nullptr) {
2395     scoped_managed_process_name_ptr.reset(new ScopedUtfChars(env, managed_process_name));
2396     if (scoped_managed_process_name_ptr->c_str() != nullptr) {
2397       process_name = scoped_managed_process_name_ptr->c_str();
2398     }
2399   }
2400 
2401   const std::string& error_msg =
2402       (process_name == nullptr || process_name[0] == '\0') ?
2403       msg : StringPrintf("(%s) %s", process_name, msg.c_str());
2404 
2405   env->FatalError(error_msg.c_str());
2406   __builtin_unreachable();
2407 }
2408 
2409 static std::set<int>* gPreloadFds = nullptr;
2410 static bool gPreloadFdsExtracted = false;
2411 
2412 // Utility routine to fork a process from the zygote.
2413 NO_STACK_PROTECTOR
ForkCommon(JNIEnv * env,bool is_system_server,const std::vector<int> & fds_to_close,const std::vector<int> & fds_to_ignore,bool is_priority_fork,bool purge)2414 pid_t zygote::ForkCommon(JNIEnv* env, bool is_system_server,
2415                          const std::vector<int>& fds_to_close,
2416                          const std::vector<int>& fds_to_ignore,
2417                          bool is_priority_fork,
2418                          bool purge) {
2419   ATRACE_CALL();
2420   if (is_priority_fork) {
2421     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2422   }
2423 
2424   SetSignalHandlers();
2425 
2426   // Curry a failure function.
2427   auto fail_fn = std::bind(zygote::ZygoteFailure, env,
2428                            is_system_server ? "system_server" : "zygote",
2429                            nullptr, _1);
2430 
2431   // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
2432   // log, which would result in the logging FDs we close being reopened.
2433   // This would cause failures because the FDs are not allowlisted.
2434   //
2435   // Note that the zygote process is single threaded at this point.
2436   BlockSignal(SIGCHLD, fail_fn);
2437 
2438   // Close any logging related FDs before we start evaluating the list of
2439   // file descriptors.
2440   __android_log_close();
2441   AStatsSocket_close();
2442 
2443   // If this is the first fork for this zygote, create the open FD table,
2444   // verifying that files are of supported type and allowlisted.  Otherwise (not
2445   // the first fork), check that the open files have not changed.  Newly open
2446   // files are not expected, and will be disallowed in the future.  Currently
2447   // they are allowed if they pass the same checks as in the
2448   // FileDescriptorTable::Create() above.
2449   if (gOpenFdTable == nullptr) {
2450     gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, fail_fn);
2451   } else {
2452     gOpenFdTable->Restat(fds_to_ignore, fail_fn);
2453   }
2454 
2455   android_fdsan_error_level fdsan_error_level = android_fdsan_get_error_level();
2456 
2457   if (purge) {
2458     // Purge unused native memory in an attempt to reduce the amount of false
2459     // sharing with the child process.  By reducing the size of the libc_malloc
2460     // region shared with the child process we reduce the number of pages that
2461     // transition to the private-dirty state when malloc adjusts the meta-data
2462     // on each of the pages it is managing after the fork.
2463     if (mallopt(M_PURGE_ALL, 0) != 1) {
2464       mallopt(M_PURGE, 0);
2465     }
2466   }
2467 
2468   pid_t pid = fork();
2469 
2470   if (pid == 0) {
2471     if (is_priority_fork) {
2472       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2473     } else {
2474       setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MIN);
2475     }
2476 
2477 #if defined(__BIONIC__) && !defined(NO_RESET_STACK_PROTECTOR)
2478     // Reset the stack guard for the new process.
2479     android_reset_stack_guards();
2480 #endif
2481 
2482     // The child process.
2483     PreApplicationInit();
2484 
2485     // Clean up any descriptors which must be closed immediately
2486     DetachDescriptors(env, fds_to_close, fail_fn);
2487 
2488     // Invalidate the entries in the USAP table.
2489     ClearUsapTable();
2490 
2491     // Re-open all remaining open file descriptors so that they aren't shared
2492     // with the zygote across a fork.
2493     gOpenFdTable->ReopenOrDetach(fail_fn);
2494 
2495     // Turn fdsan back on.
2496     android_fdsan_set_error_level(fdsan_error_level);
2497 
2498     // Reset the fd to the unsolicited zygote socket
2499     gSystemServerSocketFd = -1;
2500   } else if (pid == -1) {
2501     ALOGE("Failed to fork child process: %s (%d)", strerror(errno), errno);
2502   } else {
2503     ALOGD("Forked child process %d", pid);
2504   }
2505 
2506   // We blocked SIGCHLD prior to a fork, we unblock it here.
2507   UnblockSignal(SIGCHLD, fail_fn);
2508 
2509   if (is_priority_fork && pid != 0) {
2510     setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_DEFAULT);
2511   }
2512 
2513   return pid;
2514 }
2515 
com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv *,jclass)2516 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
2517   PreApplicationInit();
2518 }
2519 
2520 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkAndSpecialize(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jintArray managed_fds_to_close,jintArray managed_fds_to_ignore,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs,jboolean mount_sysprop_overrides)2521 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
2522         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2523         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2524         jintArray managed_fds_to_close, jintArray managed_fds_to_ignore, jboolean is_child_zygote,
2525         jstring instruction_set, jstring app_data_dir, jboolean is_top_app,
2526         jobjectArray pkg_data_info_list, jobjectArray allowlisted_data_info_list,
2527         jboolean mount_data_dirs, jboolean mount_storage_dirs, jboolean mount_sysprop_overrides) {
2528     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2529     jlong bounding_capabilities = CalculateBoundingCapabilities(env, uid, gid, gids);
2530 
2531     if (UNLIKELY(managed_fds_to_close == nullptr)) {
2532       zygote::ZygoteFailure(env, "zygote", nice_name,
2533                             "Zygote received a null fds_to_close vector.");
2534     }
2535 
2536     std::vector<int> fds_to_close =
2537         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_close).value();
2538     std::vector<int> fds_to_ignore =
2539         ExtractJIntArray(env, "zygote", nice_name, managed_fds_to_ignore)
2540             .value_or(std::vector<int>());
2541 
2542     std::vector<int> usap_pipes = MakeUsapPipeReadFDVector();
2543 
2544     fds_to_close.insert(fds_to_close.end(), usap_pipes.begin(), usap_pipes.end());
2545     fds_to_ignore.insert(fds_to_ignore.end(), usap_pipes.begin(), usap_pipes.end());
2546 
2547     fds_to_close.push_back(gUsapPoolSocketFD);
2548 
2549     if (gUsapPoolEventFD != -1) {
2550       fds_to_close.push_back(gUsapPoolEventFD);
2551       fds_to_ignore.push_back(gUsapPoolEventFD);
2552     }
2553 
2554     if (gSystemServerSocketFd != -1) {
2555         fds_to_close.push_back(gSystemServerSocketFd);
2556         fds_to_ignore.push_back(gSystemServerSocketFd);
2557     }
2558 
2559     if (gPreloadFds && gPreloadFdsExtracted) {
2560         fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2561     }
2562 
2563     pid_t pid = zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close, fds_to_ignore,
2564                                    true);
2565 
2566     if (pid == 0) {
2567         SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2568                          bounding_capabilities, mount_external, se_info, nice_name, false,
2569                          is_child_zygote == JNI_TRUE, instruction_set, app_data_dir,
2570                          is_top_app == JNI_TRUE, pkg_data_info_list, allowlisted_data_info_list,
2571                          mount_data_dirs == JNI_TRUE, mount_storage_dirs == JNI_TRUE,
2572                          mount_sysprop_overrides == JNI_TRUE);
2573     }
2574     return pid;
2575 }
2576 
2577 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkSystemServer(JNIEnv * env,jclass,uid_t uid,gid_t gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jlong permitted_capabilities,jlong effective_capabilities)2578 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
2579         JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
2580         jint runtime_flags, jobjectArray rlimits, jlong permitted_capabilities,
2581         jlong effective_capabilities) {
2582   ATRACE_CALL();
2583   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2584                    fds_to_ignore(fds_to_close);
2585 
2586   fds_to_close.push_back(gUsapPoolSocketFD);
2587 
2588   if (gUsapPoolEventFD != -1) {
2589     fds_to_close.push_back(gUsapPoolEventFD);
2590     fds_to_ignore.push_back(gUsapPoolEventFD);
2591   }
2592 
2593   if (gSystemServerSocketFd != -1) {
2594       fds_to_close.push_back(gSystemServerSocketFd);
2595       fds_to_ignore.push_back(gSystemServerSocketFd);
2596   }
2597 
2598   pid_t pid = zygote::ForkCommon(env, true,
2599                                  fds_to_close,
2600                                  fds_to_ignore,
2601                                  true);
2602   if (pid == 0) {
2603       // System server prcoess does not need data isolation so no need to
2604       // know pkg_data_info_list.
2605       SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, permitted_capabilities,
2606                        effective_capabilities, 0, MOUNT_EXTERNAL_DEFAULT, nullptr, nullptr, true,
2607                        false, nullptr, nullptr, /* is_top_app= */ false,
2608                        /* pkg_data_info_list */ nullptr,
2609                        /* allowlisted_data_info_list */ nullptr, false, false, false);
2610   } else if (pid > 0) {
2611       // The zygote process checks whether the child process has died or not.
2612       ALOGI("System server process %d has been created", pid);
2613       gSystemServerPid = pid;
2614       // There is a slight window that the system server process has crashed
2615       // but it went unnoticed because we haven't published its pid yet. So
2616       // we recheck here just to make sure that all is well.
2617       int status;
2618       if (waitpid(pid, &status, WNOHANG) == pid) {
2619           ALOGE("System server process %d has died. Restarting Zygote!", pid);
2620           RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
2621       }
2622 
2623       if (UsePerAppMemcg()) {
2624           // Assign system_server to the correct memory cgroup.
2625           // Not all devices mount memcg so check if it is mounted first
2626           // to avoid unnecessarily printing errors and denials in the logs.
2627           if (!SetTaskProfiles(pid, std::vector<std::string>{"SystemMemoryProcess"})) {
2628               ALOGE("couldn't add process %d into system memcg group", pid);
2629           }
2630       }
2631   }
2632   return pid;
2633 }
2634 
2635 /**
2636  * A JNI function that forks an unspecialized app process from the Zygote while
2637  * ensuring proper file descriptor hygiene.
2638  *
2639  * @param env  Managed runtime environment
2640  * @param read_pipe_fd  The read FD for the USAP reporting pipe.  Manually closed by the child
2641  * in managed code. -1 indicates none.
2642  * @param write_pipe_fd  The write FD for the USAP reporting pipe.  Manually closed by the
2643  * zygote in managed code. -1 indicates none.
2644  * @param managed_session_socket_fds  A list of anonymous session sockets that must be ignored by
2645  * the FD hygiene code and automatically "closed" in the new USAP.
2646  * @param args_known Arguments for specialization are available; no need to read from a socket
2647  * @param is_priority_fork  Controls the nice level assigned to the newly created process
2648  * @return child pid in the parent, 0 in the child
2649  */
2650 NO_STACK_PROTECTOR
com_android_internal_os_Zygote_nativeForkApp(JNIEnv * env,jclass,jint read_pipe_fd,jint write_pipe_fd,jintArray managed_session_socket_fds,jboolean args_known,jboolean is_priority_fork)2651 static jint com_android_internal_os_Zygote_nativeForkApp(JNIEnv* env,
2652                                                          jclass,
2653                                                          jint read_pipe_fd,
2654                                                          jint write_pipe_fd,
2655                                                          jintArray managed_session_socket_fds,
2656                                                          jboolean args_known,
2657                                                          jboolean is_priority_fork) {
2658   ATRACE_CALL();
2659   std::vector<int> session_socket_fds =
2660       ExtractJIntArray(env, "USAP", nullptr, managed_session_socket_fds)
2661           .value_or(std::vector<int>());
2662   return zygote::forkApp(env, read_pipe_fd, write_pipe_fd, session_socket_fds,
2663                             args_known == JNI_TRUE, is_priority_fork == JNI_TRUE, true);
2664 }
2665 
2666 NO_STACK_PROTECTOR
forkApp(JNIEnv * env,int read_pipe_fd,int write_pipe_fd,const std::vector<int> & session_socket_fds,bool args_known,bool is_priority_fork,bool purge)2667 int zygote::forkApp(JNIEnv* env,
2668                     int read_pipe_fd,
2669                     int write_pipe_fd,
2670                     const std::vector<int>& session_socket_fds,
2671                     bool args_known,
2672                     bool is_priority_fork,
2673                     bool purge) {
2674   ATRACE_CALL();
2675 
2676   std::vector<int> fds_to_close(MakeUsapPipeReadFDVector()),
2677                    fds_to_ignore(fds_to_close);
2678 
2679   fds_to_close.push_back(gZygoteSocketFD);
2680   if (gSystemServerSocketFd != -1) {
2681       fds_to_close.push_back(gSystemServerSocketFd);
2682   }
2683   if (args_known) {
2684       fds_to_close.push_back(gUsapPoolSocketFD);
2685   }
2686   fds_to_close.insert(fds_to_close.end(), session_socket_fds.begin(), session_socket_fds.end());
2687 
2688   fds_to_ignore.push_back(gUsapPoolSocketFD);
2689   fds_to_ignore.push_back(gZygoteSocketFD);
2690   if (read_pipe_fd != -1) {
2691       fds_to_ignore.push_back(read_pipe_fd);
2692   }
2693   if (write_pipe_fd != -1) {
2694       fds_to_ignore.push_back(write_pipe_fd);
2695   }
2696   fds_to_ignore.insert(fds_to_ignore.end(), session_socket_fds.begin(), session_socket_fds.end());
2697 
2698   if (gUsapPoolEventFD != -1) {
2699       fds_to_close.push_back(gUsapPoolEventFD);
2700       fds_to_ignore.push_back(gUsapPoolEventFD);
2701   }
2702   if (gSystemServerSocketFd != -1) {
2703       if (args_known) {
2704           fds_to_close.push_back(gSystemServerSocketFd);
2705       }
2706       fds_to_ignore.push_back(gSystemServerSocketFd);
2707   }
2708   if (gPreloadFds && gPreloadFdsExtracted) {
2709       fds_to_ignore.insert(fds_to_ignore.end(), gPreloadFds->begin(), gPreloadFds->end());
2710   }
2711 
2712   return zygote::ForkCommon(env, /* is_system_server= */ false, fds_to_close,
2713                             fds_to_ignore, is_priority_fork == JNI_TRUE, purge);
2714 }
2715 
com_android_internal_os_Zygote_nativeAllowFileAcrossFork(JNIEnv * env,jclass,jstring path)2716 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
2717         JNIEnv* env, jclass, jstring path) {
2718     ScopedUtfChars path_native(env, path);
2719     const char* path_cstr = path_native.c_str();
2720     if (!path_cstr) {
2721         RuntimeAbort(env, __LINE__, "path_cstr == nullptr");
2722     }
2723     FileDescriptorAllowlist::Get()->Allow(path_cstr);
2724 }
2725 
com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(JNIEnv * env,jclass,jint uidGidMin,jint uidGidMax)2726 static void com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter(
2727         JNIEnv* env, jclass, jint uidGidMin, jint uidGidMax) {
2728   if (!gIsSecurityEnforced) {
2729     ALOGI("seccomp disabled by setenforce 0");
2730     return;
2731   }
2732 
2733   bool installed = install_setuidgid_seccomp_filter(uidGidMin, uidGidMax);
2734   if (!installed) {
2735       RuntimeAbort(env, __LINE__, "Could not install setuid/setgid seccomp filter.");
2736   }
2737 }
2738 
2739 /**
2740  * Called from an unspecialized app process to specialize the process for a
2741  * given application.
2742  *
2743  * @param env  Managed runtime environment
2744  * @param uid  User ID of the new application
2745  * @param gid  Group ID of the new application
2746  * @param gids  Extra groups that the process belongs to
2747  * @param runtime_flags  Flags for changing the behavior of the managed runtime
2748  * @param rlimits  Resource limits
2749  * @param mount_external  The mode (read/write/normal) that external storage will be mounted with
2750  * @param se_info  SELinux policy information
2751  * @param nice_name  New name for this process
2752  * @param is_child_zygote  If the process is to become a WebViewZygote
2753  * @param instruction_set  The instruction set expected/requested by the new application
2754  * @param app_data_dir  Path to the application's data directory
2755  * @param is_top_app  If the process is for top (high priority) application
2756  */
com_android_internal_os_Zygote_nativeSpecializeAppProcess(JNIEnv * env,jclass,jint uid,jint gid,jintArray gids,jint runtime_flags,jobjectArray rlimits,jint mount_external,jstring se_info,jstring nice_name,jboolean is_child_zygote,jstring instruction_set,jstring app_data_dir,jboolean is_top_app,jobjectArray pkg_data_info_list,jobjectArray allowlisted_data_info_list,jboolean mount_data_dirs,jboolean mount_storage_dirs,jboolean mount_sysprop_overrides)2757 static void com_android_internal_os_Zygote_nativeSpecializeAppProcess(
2758         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, jint runtime_flags,
2759         jobjectArray rlimits, jint mount_external, jstring se_info, jstring nice_name,
2760         jboolean is_child_zygote, jstring instruction_set, jstring app_data_dir,
2761         jboolean is_top_app, jobjectArray pkg_data_info_list,
2762         jobjectArray allowlisted_data_info_list, jboolean mount_data_dirs,
2763         jboolean mount_storage_dirs, jboolean mount_sysprop_overrides) {
2764     jlong capabilities = CalculateCapabilities(env, uid, gid, gids, is_child_zygote);
2765     jlong bounding_capabilities = CalculateBoundingCapabilities(env, uid, gid, gids);
2766 
2767     SpecializeCommon(env, uid, gid, gids, runtime_flags, rlimits, capabilities, capabilities,
2768                      bounding_capabilities, mount_external, se_info, nice_name, false,
2769                      is_child_zygote == JNI_TRUE, instruction_set, app_data_dir,
2770                      is_top_app == JNI_TRUE, pkg_data_info_list, allowlisted_data_info_list,
2771                      mount_data_dirs == JNI_TRUE, mount_storage_dirs == JNI_TRUE,
2772                      mount_sysprop_overrides == JNI_TRUE);
2773 }
2774 
2775 /**
2776  * A helper method for fetching socket file descriptors that were opened by init from the
2777  * environment.
2778  *
2779  * @param env  Managed runtime environment
2780  * @param is_primary  If this process is the primary or secondary Zygote; used to compute the name
2781  * of the environment variable storing the file descriptors.
2782  */
com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv * env,jclass,jboolean is_primary)2783 static void com_android_internal_os_Zygote_nativeInitNativeState(JNIEnv* env, jclass,
2784                                                                  jboolean is_primary) {
2785   /*
2786    * Obtain file descriptors created by init from the environment.
2787    */
2788 
2789   gZygoteSocketFD =
2790       android_get_control_socket(is_primary ? "zygote" : "zygote_secondary");
2791   if (gZygoteSocketFD >= 0) {
2792     ALOGV("Zygote:zygoteSocketFD = %d", gZygoteSocketFD);
2793   } else {
2794     ALOGE("Unable to fetch Zygote socket file descriptor");
2795   }
2796 
2797   gUsapPoolSocketFD =
2798       android_get_control_socket(is_primary ? "usap_pool_primary" : "usap_pool_secondary");
2799   if (gUsapPoolSocketFD >= 0) {
2800     ALOGV("Zygote:usapPoolSocketFD = %d", gUsapPoolSocketFD);
2801   } else {
2802     ALOGE("Unable to fetch USAP pool socket file descriptor");
2803   }
2804 
2805   initUnsolSocketToSystemServer();
2806 
2807   /*
2808    * Security Initialization
2809    */
2810 
2811   // security_getenforce is not allowed on app process. Initialize and cache
2812   // the value before zygote forks.
2813   gIsSecurityEnforced = security_getenforce();
2814 
2815   selinux_android_seapp_context_init();
2816 
2817   /*
2818    * Storage Initialization
2819    */
2820 
2821   UnmountStorageOnInit(env);
2822 
2823   /*
2824    * Performance Initialization
2825    */
2826 
2827   if (!SetTaskProfiles(0, {})) {
2828     zygote::ZygoteFailure(env, "zygote", nullptr, "Zygote SetTaskProfiles failed");
2829   }
2830 }
2831 
2832 /**
2833  * @param env  Managed runtime environment
2834  * @return  A managed array of raw file descriptors for the read ends of the USAP reporting
2835  * pipes.
2836  */
com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv * env,jclass)2837 static jintArray com_android_internal_os_Zygote_nativeGetUsapPipeFDs(JNIEnv* env, jclass) {
2838   std::vector<int> usap_fds = MakeUsapPipeReadFDVector();
2839 
2840   jintArray managed_usap_fds = env->NewIntArray(usap_fds.size());
2841   env->SetIntArrayRegion(managed_usap_fds, 0, usap_fds.size(), usap_fds.data());
2842 
2843   return managed_usap_fds;
2844 }
2845 
2846 /*
2847  * Add the given pid and file descriptor to the Usap table. CriticalNative method.
2848  */
com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid,jint read_pipe_fd)2849 static void com_android_internal_os_Zygote_nativeAddUsapTableEntry(jint pid, jint read_pipe_fd) {
2850   AddUsapTableEntry(pid, read_pipe_fd);
2851 }
2852 
2853 /**
2854  * A JNI wrapper around RemoveUsapTableEntry. CriticalNative method.
2855  *
2856  * @param env  Managed runtime environment
2857  * @param usap_pid  Process ID of the USAP entry to invalidate
2858  * @return  True if an entry was invalidated; false otherwise.
2859  */
com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid)2860 static jboolean com_android_internal_os_Zygote_nativeRemoveUsapTableEntry(jint usap_pid) {
2861   return RemoveUsapTableEntry(usap_pid);
2862 }
2863 
2864 /**
2865  * Creates the USAP pool event FD if it doesn't exist and returns it.  This is used by the
2866  * ZygoteServer poll loop to know when to re-fill the USAP pool.
2867  *
2868  * @param env  Managed runtime environment
2869  * @return A raw event file descriptor used to communicate (from the signal handler) when the
2870  * Zygote receives a SIGCHLD for a USAP
2871  */
com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv * env,jclass)2872 static jint com_android_internal_os_Zygote_nativeGetUsapPoolEventFD(JNIEnv* env, jclass) {
2873   if (gUsapPoolEventFD == -1) {
2874     if ((gUsapPoolEventFD = eventfd(0, 0)) == -1) {
2875       zygote::ZygoteFailure(env, "zygote", nullptr,
2876                             StringPrintf("Unable to create eventfd: %s", strerror(errno)));
2877     }
2878   }
2879 
2880   return gUsapPoolEventFD;
2881 }
2882 
2883 /**
2884  * @param env  Managed runtime environment
2885  * @return The number of USAPs currently in the USAP pool
2886  */
com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv * env,jclass)2887 static jint com_android_internal_os_Zygote_nativeGetUsapPoolCount(JNIEnv* env, jclass) {
2888   return gUsapPoolCount;
2889 }
2890 
2891 /**
2892  * Kills all processes currently in the USAP pool and closes their read pipe
2893  * FDs.
2894  *
2895  * @param env  Managed runtime environment
2896  */
com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv * env,jclass)2897 static void com_android_internal_os_Zygote_nativeEmptyUsapPool(JNIEnv* env, jclass) {
2898   for (auto& entry : gUsapTable) {
2899     auto entry_storage = entry.GetValues();
2900 
2901     if (entry_storage.has_value()) {
2902       kill(entry_storage.value().pid, SIGTERM);
2903 
2904       // Clean up the USAP table entry here.  This avoids a potential race
2905       // where a newly created USAP might not be able to find a valid table
2906       // entry if signal handler (which would normally do the cleanup) doesn't
2907       // run between now and when the new process is created.
2908 
2909       close(entry_storage.value().read_pipe_fd);
2910 
2911       // Avoid a second atomic load by invalidating instead of clearing.
2912       entry.Invalidate();
2913       --gUsapPoolCount;
2914     }
2915   }
2916 }
2917 
com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv * env,jclass)2918 static void com_android_internal_os_Zygote_nativeBlockSigTerm(JNIEnv* env, jclass) {
2919   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2920   BlockSignal(SIGTERM, fail_fn);
2921 }
2922 
com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv * env,jclass)2923 static void com_android_internal_os_Zygote_nativeUnblockSigTerm(JNIEnv* env, jclass) {
2924   auto fail_fn = std::bind(zygote::ZygoteFailure, env, "usap", nullptr, _1);
2925   UnblockSignal(SIGTERM, fail_fn);
2926 }
2927 
com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv * env,jclass)2928 static void com_android_internal_os_Zygote_nativeBoostUsapPriority(JNIEnv* env, jclass) {
2929   setpriority(PRIO_PROCESS, 0, PROCESS_PRIORITY_MAX);
2930 }
2931 
com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv * env,jclass,jbyteArray in,jint length,jintArray out)2932 static jint com_android_internal_os_Zygote_nativeParseSigChld(JNIEnv* env, jclass, jbyteArray in,
2933                                                               jint length, jintArray out) {
2934     if (length != sizeof(struct UnsolicitedZygoteMessageSigChld)) {
2935         // Apparently it's not the message we are expecting.
2936         return -1;
2937     }
2938     if (in == nullptr || out == nullptr) {
2939         // Invalid parameter
2940         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2941         return -1;
2942     }
2943     ScopedByteArrayRO source(env, in);
2944     if (source.size() < static_cast<size_t>(length)) {
2945         // Invalid parameter
2946         jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2947         return -1;
2948     }
2949     const struct UnsolicitedZygoteMessageSigChld* msg =
2950             reinterpret_cast<const struct UnsolicitedZygoteMessageSigChld*>(source.get());
2951 
2952     switch (msg->header.type) {
2953         case UNSOLICITED_ZYGOTE_MESSAGE_TYPE_SIGCHLD: {
2954             ScopedIntArrayRW buf(env, out);
2955             if (buf.size() != 3) {
2956                 jniThrowException(env, "java/lang/IllegalArgumentException", nullptr);
2957                 return UNSOLICITED_ZYGOTE_MESSAGE_TYPE_RESERVED;
2958             }
2959             buf[0] = msg->payload.pid;
2960             buf[1] = msg->payload.uid;
2961             buf[2] = msg->payload.status;
2962             return 3;
2963         }
2964         default:
2965             break;
2966     }
2967     return -1;
2968 }
2969 
com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv * env,jclass)2970 static jboolean com_android_internal_os_Zygote_nativeSupportsMemoryTagging(JNIEnv* env, jclass) {
2971 #if defined(__aarch64__)
2972   return mte_supported();
2973 #else
2974   return false;
2975 #endif
2976 }
2977 
com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv * env,jclass)2978 static jboolean com_android_internal_os_Zygote_nativeSupportsTaggedPointers(JNIEnv* env, jclass) {
2979 #ifdef __aarch64__
2980   int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2981   return res >= 0 && res & PR_TAGGED_ADDR_ENABLE;
2982 #else
2983   return false;
2984 #endif
2985 }
2986 
com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv * env,jclass)2987 static jint com_android_internal_os_Zygote_nativeCurrentTaggingLevel(JNIEnv* env, jclass) {
2988 #if defined(__aarch64__)
2989   int level = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
2990   if (level < 0) {
2991     ALOGE("Failed to get memory tag level: %s", strerror(errno));
2992     return 0;
2993   } else if (!(level & PR_TAGGED_ADDR_ENABLE)) {
2994     return 0;
2995   }
2996   // TBI is only possible on non-MTE hardware.
2997   if (!mte_supported()) {
2998     return MEMORY_TAG_LEVEL_TBI;
2999   }
3000 
3001   switch (level & PR_MTE_TCF_MASK) {
3002     case PR_MTE_TCF_NONE:
3003       return 0;
3004     case PR_MTE_TCF_SYNC:
3005       return MEMORY_TAG_LEVEL_SYNC;
3006     case PR_MTE_TCF_ASYNC:
3007     case PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC:
3008       return MEMORY_TAG_LEVEL_ASYNC;
3009     default:
3010       ALOGE("Unknown memory tagging level: %i", level);
3011       return 0;
3012   }
3013 #else // defined(__aarch64__)
3014   return 0;
3015 #endif // defined(__aarch64__)
3016 }
3017 
com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv * env,jclass)3018 static void com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload(JNIEnv* env, jclass) {
3019     // Ignore invocations when too early or too late.
3020     if (gPreloadFds) {
3021         return;
3022     }
3023 
3024     // App Zygote Preload starts soon. Save FDs remaining open.  After the
3025     // preload finishes newly open files will be determined.
3026     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
3027     gPreloadFds = GetOpenFds(fail_fn).release();
3028 }
3029 
com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv * env,jclass)3030 static void com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload(JNIEnv* env, jclass) {
3031     // Ignore invocations when too early or too late.
3032     if (!gPreloadFds || gPreloadFdsExtracted) {
3033         return;
3034     }
3035 
3036     // Find the newly open FDs, if any.
3037     auto fail_fn = std::bind(zygote::ZygoteFailure, env, "zygote", nullptr, _1);
3038     std::unique_ptr<std::set<int>> current_fds = GetOpenFds(fail_fn);
3039     auto difference = std::make_unique<std::set<int>>();
3040     std::set_difference(current_fds->begin(), current_fds->end(), gPreloadFds->begin(),
3041                         gPreloadFds->end(), std::inserter(*difference, difference->end()));
3042     delete gPreloadFds;
3043     gPreloadFds = difference.release();
3044     gPreloadFdsExtracted = true;
3045 }
3046 
3047 static const JNINativeMethod gMethods[] = {
3048         {"nativeForkAndSpecialize",
3049          "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/"
3050          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)I",
3051          (void*)com_android_internal_os_Zygote_nativeForkAndSpecialize},
3052         {"nativeForkSystemServer", "(II[II[[IJJ)I",
3053          (void*)com_android_internal_os_Zygote_nativeForkSystemServer},
3054         {"nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
3055          (void*)com_android_internal_os_Zygote_nativeAllowFileAcrossFork},
3056         {"nativePreApplicationInit", "()V",
3057          (void*)com_android_internal_os_Zygote_nativePreApplicationInit},
3058         {"nativeInstallSeccompUidGidFilter", "(II)V",
3059          (void*)com_android_internal_os_Zygote_nativeInstallSeccompUidGidFilter},
3060         {"nativeForkApp", "(II[IZZ)I", (void*)com_android_internal_os_Zygote_nativeForkApp},
3061         // @CriticalNative
3062         {"nativeAddUsapTableEntry", "(II)V",
3063          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
3064         {"nativeSpecializeAppProcess",
3065          "(II[II[[IILjava/lang/String;Ljava/lang/String;ZLjava/lang/String;Ljava/lang/"
3066          "String;Z[Ljava/lang/String;[Ljava/lang/String;ZZZ)V",
3067          (void*)com_android_internal_os_Zygote_nativeSpecializeAppProcess},
3068         {"nativeInitNativeState", "(Z)V",
3069          (void*)com_android_internal_os_Zygote_nativeInitNativeState},
3070         {"nativeGetUsapPipeFDs", "()[I",
3071          (void*)com_android_internal_os_Zygote_nativeGetUsapPipeFDs},
3072         // @CriticalNative
3073         {"nativeAddUsapTableEntry", "(II)V",
3074          (void*)com_android_internal_os_Zygote_nativeAddUsapTableEntry},
3075         // @CriticalNative
3076         {"nativeRemoveUsapTableEntry", "(I)Z",
3077          (void*)com_android_internal_os_Zygote_nativeRemoveUsapTableEntry},
3078         {"nativeGetUsapPoolEventFD", "()I",
3079          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolEventFD},
3080         {"nativeGetUsapPoolCount", "()I",
3081          (void*)com_android_internal_os_Zygote_nativeGetUsapPoolCount},
3082         {"nativeEmptyUsapPool", "()V", (void*)com_android_internal_os_Zygote_nativeEmptyUsapPool},
3083         {"nativeBlockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeBlockSigTerm},
3084         {"nativeUnblockSigTerm", "()V", (void*)com_android_internal_os_Zygote_nativeUnblockSigTerm},
3085         {"nativeBoostUsapPriority", "()V",
3086          (void*)com_android_internal_os_Zygote_nativeBoostUsapPriority},
3087         {"nativeParseSigChld", "([BI[I)I",
3088          (void*)com_android_internal_os_Zygote_nativeParseSigChld},
3089         {"nativeSupportsMemoryTagging", "()Z",
3090          (void*)com_android_internal_os_Zygote_nativeSupportsMemoryTagging},
3091         {"nativeSupportsTaggedPointers", "()Z",
3092          (void*)com_android_internal_os_Zygote_nativeSupportsTaggedPointers},
3093         {"nativeCurrentTaggingLevel", "()I",
3094          (void*)com_android_internal_os_Zygote_nativeCurrentTaggingLevel},
3095         {"nativeMarkOpenedFilesBeforePreload", "()V",
3096          (void*)com_android_internal_os_Zygote_nativeMarkOpenedFilesBeforePreload},
3097         {"nativeAllowFilesOpenedByPreload", "()V",
3098          (void*)com_android_internal_os_Zygote_nativeAllowFilesOpenedByPreload},
3099 };
3100 
register_com_android_internal_os_Zygote(JNIEnv * env)3101 int register_com_android_internal_os_Zygote(JNIEnv* env) {
3102   gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
3103   gCallPostForkSystemServerHooks = GetStaticMethodIDOrDie(env, gZygoteClass,
3104                                                           "callPostForkSystemServerHooks",
3105                                                           "(I)V");
3106   gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
3107                                                    "(IZZLjava/lang/String;)V");
3108 
3109   gZygoteInitClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteInitClassName));
3110   gGetOrCreateSystemServerClassLoader =
3111           GetStaticMethodIDOrDie(env, gZygoteInitClass, "getOrCreateSystemServerClassLoader",
3112                                  "()Ljava/lang/ClassLoader;");
3113   gPrefetchStandaloneSystemServerJars =
3114           GetStaticMethodIDOrDie(env, gZygoteInitClass, "prefetchStandaloneSystemServerJars",
3115                                  "()V");
3116 
3117   RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
3118 
3119   return JNI_OK;
3120 }
3121 }  // namespace android
3122