1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "service.h"
18 
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <inttypes.h>
22 #include <linux/securebits.h>
23 #include <sched.h>
24 #include <sys/prctl.h>
25 #include <sys/stat.h>
26 #include <sys/time.h>
27 #include <termios.h>
28 #include <unistd.h>
29 #include <thread>
30 
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/properties.h>
34 #include <android-base/scopeguard.h>
35 #include <android-base/stringprintf.h>
36 #include <android-base/strings.h>
37 #include <cutils/sockets.h>
38 #include <processgroup/processgroup.h>
39 #include <selinux/selinux.h>
40 #include <sys/signalfd.h>
41 
42 #include <string>
43 
44 #include "interprocess_fifo.h"
45 #include "lmkd_service.h"
46 #include "service_list.h"
47 #include "util.h"
48 
49 #if defined(__BIONIC__)
50 #include <bionic/reserved_signals.h>
51 #endif
52 
53 #ifdef INIT_FULL_SOURCES
54 #include <android/api-level.h>
55 
56 #include "mount_namespace.h"
57 #include "reboot_utils.h"
58 #include "selinux.h"
59 #else
60 #include "host_init_stubs.h"
61 #endif
62 
63 using android::base::boot_clock;
64 using android::base::GetBoolProperty;
65 using android::base::GetIntProperty;
66 using android::base::GetProperty;
67 using android::base::Join;
68 using android::base::make_scope_guard;
69 using android::base::SetProperty;
70 using android::base::StartsWith;
71 using android::base::StringPrintf;
72 using android::base::unique_fd;
73 using android::base::WriteStringToFile;
74 
75 namespace android {
76 namespace init {
77 
ComputeContextFromExecutable(const std::string & service_path)78 static Result<std::string> ComputeContextFromExecutable(const std::string& service_path) {
79     std::string computed_context;
80 
81     char* raw_con = nullptr;
82     char* raw_filecon = nullptr;
83 
84     if (getcon(&raw_con) == -1) {
85         return Error() << "Could not get security context";
86     }
87     std::unique_ptr<char, decltype(&freecon)> mycon(raw_con, freecon);
88 
89     if (getfilecon(service_path.c_str(), &raw_filecon) == -1) {
90         return Error() << "Could not get file context";
91     }
92     std::unique_ptr<char, decltype(&freecon)> filecon(raw_filecon, freecon);
93 
94     char* new_con = nullptr;
95     int rc = security_compute_create(mycon.get(), filecon.get(),
96                                      string_to_security_class("process"), &new_con);
97     if (rc == 0) {
98         computed_context = new_con;
99         free(new_con);
100     }
101     if (rc == 0 && computed_context == mycon.get()) {
102         return Error() << "File " << service_path << "(labeled \"" << filecon.get()
103                        << "\") has incorrect label or no domain transition from " << mycon.get()
104                        << " to another SELinux domain defined. Have you configured your "
105                           "service correctly? https://source.android.com/security/selinux/"
106                           "device-policy#label_new_services_and_address_denials. Note: this "
107                           "error shows up even in permissive mode in order to make auditing "
108                           "denials possible.";
109     }
110     if (rc < 0) {
111         return Error() << "Could not get process context";
112     }
113     return computed_context;
114 }
115 
ExpandArgsAndExecv(const std::vector<std::string> & args,bool sigstop)116 static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
117     std::vector<std::string> expanded_args;
118     std::vector<char*> c_strings;
119 
120     expanded_args.resize(args.size());
121     c_strings.push_back(const_cast<char*>(args[0].data()));
122     for (std::size_t i = 1; i < args.size(); ++i) {
123         auto expanded_arg = ExpandProps(args[i]);
124         if (!expanded_arg.ok()) {
125             LOG(FATAL) << args[0] << ": cannot expand arguments': " << expanded_arg.error();
126         }
127         expanded_args[i] = *expanded_arg;
128         c_strings.push_back(expanded_args[i].data());
129     }
130     c_strings.push_back(nullptr);
131 
132     if (sigstop) {
133         kill(getpid(), SIGSTOP);
134     }
135 
136     return execv(c_strings[0], c_strings.data()) == 0;
137 }
138 
139 unsigned long Service::next_start_order_ = 1;
140 bool Service::is_exec_service_running_ = false;
141 
Service(const std::string & name,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)142 Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
143                  const std::string& filename, const std::vector<std::string>& args)
144     : Service(name, 0, std::nullopt, 0, {}, 0, "", subcontext_for_restart_commands, filename,
145               args) {}
146 
Service(const std::string & name,unsigned flags,std::optional<uid_t> uid,gid_t gid,const std::vector<gid_t> & supp_gids,int namespace_flags,const std::string & seclabel,Subcontext * subcontext_for_restart_commands,const std::string & filename,const std::vector<std::string> & args)147 Service::Service(const std::string& name, unsigned flags, std::optional<uid_t> uid, gid_t gid,
148                  const std::vector<gid_t>& supp_gids, int namespace_flags,
149                  const std::string& seclabel, Subcontext* subcontext_for_restart_commands,
150                  const std::string& filename, const std::vector<std::string>& args)
151     : name_(name),
152       classnames_({"default"}),
153       flags_(flags),
154       pid_(0),
155       crash_count_(0),
156       proc_attr_{.ioprio_class = IoSchedClass_NONE,
157                  .ioprio_pri = 0,
158                  .parsed_uid = uid,
159                  .gid = gid,
160                  .supp_gids = supp_gids,
161                  .priority = 0},
162       namespaces_{.flags = namespace_flags},
163       seclabel_(seclabel),
164       subcontext_(subcontext_for_restart_commands),
165       onrestart_(false, subcontext_for_restart_commands, "<Service '" + name + "' onrestart>", 0,
166                  "onrestart", {}),
167       oom_score_adjust_(DEFAULT_OOM_SCORE_ADJUST),
168       start_order_(0),
169       args_(args),
170       filename_(filename) {}
171 
NotifyStateChange(const std::string & new_state) const172 void Service::NotifyStateChange(const std::string& new_state) const {
173     if ((flags_ & SVC_TEMPORARY) != 0) {
174         // Services created by 'exec' are temporary and don't have properties tracking their state.
175         return;
176     }
177 
178     std::string prop_name = "init.svc." + name_;
179     SetProperty(prop_name, new_state);
180 
181     if (new_state == "running") {
182         uint64_t start_ns = time_started_.time_since_epoch().count();
183         std::string boottime_property = "ro.boottime." + name_;
184         if (GetProperty(boottime_property, "").empty()) {
185             SetProperty(boottime_property, std::to_string(start_ns));
186         }
187     }
188 
189     // init.svc_debug_pid.* properties are only for tests, and should not be used
190     // on device for security checks.
191     std::string pid_property = "init.svc_debug_pid." + name_;
192     if (new_state == "running") {
193         SetProperty(pid_property, std::to_string(pid_));
194     } else if (new_state == "stopped") {
195         SetProperty(pid_property, "");
196     }
197 }
198 
KillProcessGroup(int signal)199 void Service::KillProcessGroup(int signal) {
200     // Always attempt the process kill if process is still running.
201     // Cgroup clean up routines are idempotent. It's safe to call
202     // killProcessGroup repeatedly. During shutdown, `init` will
203     // call this function to send SIGTERM/SIGKILL to all processes.
204     // These signals must be sent for a successful shutdown.
205     if (!process_cgroup_empty_ || IsRunning()) {
206         LOG(INFO) << "Sending signal " << signal << " to service '" << name_ << "' (pid " << pid_
207                   << ") process group...";
208         int r;
209         if (signal == SIGTERM) {
210             r = killProcessGroupOnce(uid(), pid_, signal);
211         } else {
212             r = killProcessGroup(uid(), pid_, signal);
213         }
214 
215         if (r == 0) process_cgroup_empty_ = true;
216     }
217 
218     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
219         LmkdUnregister(name_, pid_);
220     }
221 }
222 
SetProcessAttributesAndCaps(InterprocessFifo setsid_finished)223 void Service::SetProcessAttributesAndCaps(InterprocessFifo setsid_finished) {
224     // Keep capabilites on uid change.
225     if (capabilities_ && uid()) {
226         // If Android is running in a container, some securebits might already
227         // be locked, so don't change those.
228         unsigned long securebits = prctl(PR_GET_SECUREBITS);
229         if (securebits == -1UL) {
230             PLOG(FATAL) << "prctl(PR_GET_SECUREBITS) failed for " << name_;
231         }
232         securebits |= SECBIT_KEEP_CAPS | SECBIT_KEEP_CAPS_LOCKED;
233         if (prctl(PR_SET_SECUREBITS, securebits) != 0) {
234             PLOG(FATAL) << "prctl(PR_SET_SECUREBITS) failed for " << name_;
235         }
236     }
237 
238     if (auto result = SetProcessAttributes(proc_attr_, std::move(setsid_finished)); !result.ok()) {
239         LOG(FATAL) << "cannot set attribute for " << name_ << ": " << result.error();
240     }
241 
242     if (!seclabel_.empty()) {
243         if (setexeccon(seclabel_.c_str()) < 0) {
244             PLOG(FATAL) << "cannot setexeccon('" << seclabel_ << "') for " << name_;
245         }
246     }
247 
248     if (capabilities_) {
249         if (!SetCapsForExec(*capabilities_)) {
250             LOG(FATAL) << "cannot set capabilities for " << name_;
251         }
252     } else if (uid()) {
253         // Inheritable caps can be non-zero when running in a container.
254         if (!DropInheritableCaps()) {
255             LOG(FATAL) << "cannot drop inheritable caps for " << name_;
256         }
257     }
258 }
259 
Reap(const siginfo_t & siginfo)260 void Service::Reap(const siginfo_t& siginfo) {
261     if (!(flags_ & SVC_ONESHOT) || (flags_ & SVC_RESTART)) {
262         KillProcessGroup(SIGKILL);
263     } else {
264         // Legacy behavior from ~2007 until Android R: this else branch did not exist and we did not
265         // kill the process group in this case.
266         if (SelinuxGetVendorAndroidVersion() >= __ANDROID_API_R__) {
267             // The new behavior in Android R is to kill these process groups in all cases.  The
268             // 'true' parameter instructions KillProcessGroup() to report a warning message where it
269             // detects a difference in behavior has occurred.
270             KillProcessGroup(SIGKILL);
271         }
272     }
273 
274     // Remove any socket resources we may have created.
275     for (const auto& socket : sockets_) {
276         if (socket.persist) {
277             continue;
278         }
279         auto path = ANDROID_SOCKET_DIR "/" + socket.name;
280         unlink(path.c_str());
281     }
282 
283     for (const auto& f : reap_callbacks_) {
284         f(siginfo);
285     }
286 
287     if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) {
288         LOG(ERROR) << "Service " << name_
289                    << " has 'reboot_on_failure' option and failed, shutting down system.";
290         trigger_shutdown(*on_failure_reboot_target_);
291     }
292 
293     if (flags_ & SVC_EXEC) UnSetExec();
294 
295     if (name_ == "zygote" || name_ == "zygote64") {
296         removeAllEmptyProcessGroups();
297     }
298 
299     if (flags_ & SVC_TEMPORARY) return;
300 
301     pid_ = 0;
302     flags_ &= (~SVC_RUNNING);
303     start_order_ = 0;
304     was_last_exit_ok_ = siginfo.si_code == CLD_EXITED && siginfo.si_status == 0;
305 
306     // Oneshot processes go into the disabled state on exit,
307     // except when manually restarted.
308     if ((flags_ & SVC_ONESHOT) && !(flags_ & SVC_RESTART) && !(flags_ & SVC_RESET)) {
309         flags_ |= SVC_DISABLED;
310     }
311 
312     // Disabled and reset processes do not get restarted automatically.
313     if (flags_ & (SVC_DISABLED | SVC_RESET))  {
314         NotifyStateChange("stopped");
315         return;
316     }
317 
318 #if INIT_FULL_SOURCES
319     static bool is_apex_updatable = true;
320 #else
321     static bool is_apex_updatable = false;
322 #endif
323     const bool use_default_mount_ns =
324             mount_namespace_.has_value() && *mount_namespace_ == NS_DEFAULT;
325     const bool is_process_updatable = use_default_mount_ns && is_apex_updatable;
326 
327 #if defined(__BIONIC__) && defined(SEGV_MTEAERR)
328     // As a precaution, we only upgrade a service once per reboot, to limit
329     // the potential impact.
330     //
331     // BIONIC_SIGNAL_ART_PROFILER is a magic value used by deuggerd to signal
332     // that the process crashed with SIGSEGV and SEGV_MTEAERR. This signal will
333     // never be seen otherwise in a crash, because it always gets handled by the
334     // profiling signal handlers in bionic. See also
335     // debuggerd/handler/debuggerd_handler.cpp.
336     bool should_upgrade_mte = siginfo.si_code != CLD_EXITED &&
337                               siginfo.si_status == BIONIC_SIGNAL_ART_PROFILER && !upgraded_mte_;
338 
339     if (should_upgrade_mte) {
340         constexpr int kDefaultUpgradeSecs = 60;
341         int secs = GetIntProperty("persist.device_config.memory_safety_native.upgrade_secs.default",
342                                   kDefaultUpgradeSecs);
343         secs = GetIntProperty(
344                 "persist.device_config.memory_safety_native.upgrade_secs.service." + name_, secs);
345         if (secs > 0) {
346             LOG(INFO) << "Upgrading service " << name_ << " to sync MTE for " << secs << " seconds";
347             once_environment_vars_.emplace_back("BIONIC_MEMTAG_UPGRADE_SECS", std::to_string(secs));
348             upgraded_mte_ = true;
349         } else {
350             LOG(INFO) << "Not upgrading service " << name_ << " to sync MTE due to device config";
351         }
352     }
353 #endif
354 
355     // If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
356     // reboot into bootloader or set crashing property
357     boot_clock::time_point now = boot_clock::now();
358     constexpr const char native_watchdog_reboot_time[] = "persist.init.svc.last_fatal_reboot_epoch";
359     uint64_t throttle_window =
360             std::chrono::duration_cast<std::chrono::seconds>(std::chrono::hours(24)).count();
361     if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART) &&
362         !was_last_exit_ok_) {
363         bool boot_completed = GetBoolProperty("sys.boot_completed", false);
364         if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
365             if (++crash_count_ > 4) {
366                 auto exit_reason =
367                         boot_completed
368                                 ? "in " + std::to_string(fatal_crash_window_.count()) + " minutes"
369                                 : "before boot completed";
370                 if (flags_ & SVC_CRITICAL) {
371                     if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
372                         uint64_t epoch_time =
373                                 std::chrono::duration_cast<std::chrono::seconds>(
374                                         std::chrono::system_clock::now().time_since_epoch())
375                                         .count();
376                         // Do not reboot again If it was already initiated in the last 24hrs
377                         if (epoch_time - GetIntProperty(native_watchdog_reboot_time, 0) >
378                             throttle_window) {
379                             SetProperty(native_watchdog_reboot_time, std::to_string(epoch_time));
380                             // Aborts into `fatal_reboot_target_'.
381                             SetFatalRebootTarget(fatal_reboot_target_);
382                             LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
383                                        << exit_reason;
384                         } else {
385                             LOG(INFO) << "Reboot already performed in last 24hrs because of crash.";
386                         }
387                     }
388                 } else {
389                     LOG(ERROR) << "process with updatable components '" << name_
390                                << "' exited 4 times " << exit_reason;
391                     // Notifies update_verifier and apexd
392                     SetProperty("sys.init.updatable_crashing_process_name", name_);
393                     SetProperty("sys.init.updatable_crashing", "1");
394                 }
395             }
396         } else {
397             time_crashed_ = now;
398             crash_count_ = 1;
399         }
400     }
401 
402     flags_ &= (~SVC_RESTART);
403     flags_ |= SVC_RESTARTING;
404 
405     // Execute all onrestart commands for this service.
406     onrestart_.ExecuteAllCommands();
407 
408     NotifyStateChange("restarting");
409     return;
410 }
411 
DumpState() const412 void Service::DumpState() const {
413     LOG(INFO) << "service " << name_;
414     LOG(INFO) << "  class '" << Join(classnames_, " ") << "'";
415     LOG(INFO) << "  exec " << Join(args_, " ");
416     for (const auto& socket : sockets_) {
417         LOG(INFO) << "  socket " << socket.name;
418     }
419     for (const auto& file : files_) {
420         LOG(INFO) << "  file " << file.name;
421     }
422 }
423 
424 
ExecStart()425 Result<void> Service::ExecStart() {
426     auto reboot_on_failure = make_scope_guard([this] {
427         if (on_failure_reboot_target_) {
428             trigger_shutdown(*on_failure_reboot_target_);
429         }
430     });
431 
432     if (is_updatable() && !IsDefaultMountNamespaceReady()) {
433         // Don't delay the service for ExecStart() as the semantic is that
434         // the caller might depend on the side effect of the execution.
435         return Error() << "Cannot start an updatable service '" << name_
436                        << "' before configs from APEXes are all loaded";
437     }
438 
439     flags_ |= SVC_ONESHOT;
440 
441     if (auto result = Start(); !result.ok()) {
442         return result;
443     }
444 
445     flags_ |= SVC_EXEC;
446     is_exec_service_running_ = true;
447 
448     LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << uid() << " gid "
449               << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context "
450               << (!seclabel_.empty() ? seclabel_ : "default") << ") started; waiting...";
451 
452     reboot_on_failure.Disable();
453     return {};
454 }
455 
CheckConsole()456 Result<void> Service::CheckConsole() {
457     if (!(flags_ & SVC_CONSOLE)) {
458         return {};
459     }
460 
461     // On newer kernels, /dev/console will always exist because
462     // "console=ttynull" is hard-coded in CONFIG_CMDLINE. This new boot
463     // property should be set via "androidboot.serialconsole=0" to explicitly
464     // disable services requiring the console. For older kernels and boot
465     // images, not setting this at all will fall back to the old behavior
466     if (GetProperty("ro.boot.serialconsole", "") == "0") {
467         flags_ |= SVC_DISABLED;
468         return {};
469     }
470 
471     if (proc_attr_.console.empty()) {
472         proc_attr_.console = "/dev/" + GetProperty("ro.boot.console", "console");
473     }
474 
475     // Make sure that open call succeeds to ensure a console driver is
476     // properly registered for the device node
477     int console_fd = open(proc_attr_.console.c_str(), O_RDWR | O_CLOEXEC);
478     if (console_fd < 0) {
479         flags_ |= SVC_DISABLED;
480         return ErrnoError() << "Couldn't open console '" << proc_attr_.console << "'";
481     }
482     close(console_fd);
483     return {};
484 }
485 
486 // Configures the memory cgroup properties for the service.
ConfigureMemcg()487 void Service::ConfigureMemcg() {
488     if (swappiness_ != -1) {
489         if (!setProcessGroupSwappiness(uid(), pid_, swappiness_)) {
490             PLOG(ERROR) << "setProcessGroupSwappiness failed";
491         }
492     }
493 
494     if (soft_limit_in_bytes_ != -1) {
495         if (!setProcessGroupSoftLimit(uid(), pid_, soft_limit_in_bytes_)) {
496             PLOG(ERROR) << "setProcessGroupSoftLimit failed";
497         }
498     }
499 
500     size_t computed_limit_in_bytes = limit_in_bytes_;
501     if (limit_percent_ != -1) {
502         long page_size = sysconf(_SC_PAGESIZE);
503         long num_pages = sysconf(_SC_PHYS_PAGES);
504         if (page_size > 0 && num_pages > 0) {
505             size_t max_mem = SIZE_MAX;
506             if (size_t(num_pages) < SIZE_MAX / size_t(page_size)) {
507                 max_mem = size_t(num_pages) * size_t(page_size);
508             }
509             computed_limit_in_bytes =
510                     std::min(computed_limit_in_bytes, max_mem / 100 * limit_percent_);
511         }
512     }
513 
514     if (!limit_property_.empty()) {
515         // This ends up overwriting computed_limit_in_bytes but only if the
516         // property is defined.
517         computed_limit_in_bytes =
518                 android::base::GetUintProperty(limit_property_, computed_limit_in_bytes, SIZE_MAX);
519     }
520 
521     if (computed_limit_in_bytes != size_t(-1)) {
522         if (!setProcessGroupLimit(uid(), pid_, computed_limit_in_bytes)) {
523             PLOG(ERROR) << "setProcessGroupLimit failed";
524         }
525     }
526 }
527 
528 // Enters namespaces, sets environment variables, writes PID files and runs the service executable.
RunService(const std::vector<Descriptor> & descriptors,InterprocessFifo cgroups_activated,InterprocessFifo setsid_finished)529 void Service::RunService(const std::vector<Descriptor>& descriptors,
530                          InterprocessFifo cgroups_activated, InterprocessFifo setsid_finished) {
531     if (auto result = EnterNamespaces(namespaces_, name_, mount_namespace_); !result.ok()) {
532         LOG(FATAL) << "Service '" << name_ << "' failed to set up namespaces: " << result.error();
533     }
534 
535     for (const auto& [key, value] : once_environment_vars_) {
536         setenv(key.c_str(), value.c_str(), 1);
537     }
538     for (const auto& [key, value] : environment_vars_) {
539         setenv(key.c_str(), value.c_str(), 1);
540     }
541 
542     for (const auto& descriptor : descriptors) {
543         descriptor.Publish();
544     }
545 
546     if (auto result = WritePidToFiles(&writepid_files_); !result.ok()) {
547         LOG(ERROR) << "failed to write pid to files: " << result.error();
548     }
549 
550     // Wait until the cgroups have been created and until the cgroup controllers have been
551     // activated.
552     Result<uint8_t> byte = cgroups_activated.Read();
553     if (!byte.ok()) {
554         LOG(ERROR) << name_ << ": failed to read from notification channel: " << byte.error();
555     }
556     cgroups_activated.Close();
557     if (*byte != kCgroupsActivated) {
558         LOG(FATAL) << "Service '" << name_  << "' failed to start due to a fatal error";
559         _exit(EXIT_FAILURE);
560     }
561 
562     if (task_profiles_.size() > 0) {
563         bool succeeded = SelinuxGetVendorAndroidVersion() < __ANDROID_API_U__
564                                  ?
565                                  // Compatibility mode: apply the task profiles to the current
566                                  // thread.
567                                  SetTaskProfiles(getpid(), task_profiles_)
568                                  :
569                                  // Apply the task profiles to the current process.
570                                  SetProcessProfiles(getuid(), getpid(), task_profiles_);
571         if (!succeeded) {
572             LOG(ERROR) << "failed to set task profiles";
573         }
574     }
575 
576     // As requested, set our gid, supplemental gids, uid, context, and
577     // priority. Aborts on failure.
578     SetProcessAttributesAndCaps(std::move(setsid_finished));
579 
580     if (!ExpandArgsAndExecv(args_, sigstop_)) {
581         PLOG(ERROR) << "cannot execv('" << args_[0]
582                     << "'). See the 'Debugging init' section of init's README.md for tips";
583     }
584 }
585 
Start()586 Result<void> Service::Start() {
587     auto reboot_on_failure = make_scope_guard([this] {
588         if (on_failure_reboot_target_) {
589             trigger_shutdown(*on_failure_reboot_target_);
590         }
591     });
592 
593     if (is_updatable() && !IsDefaultMountNamespaceReady()) {
594         ServiceList::GetInstance().DelayService(*this);
595         return Error() << "Cannot start an updatable service '" << name_
596                        << "' before configs from APEXes are all loaded. "
597                        << "Queued for execution.";
598     }
599 
600     bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
601     ResetFlagsForStart();
602 
603     // Running processes require no additional work --- if they're in the
604     // process of exiting, we've ensured that they will immediately restart
605     // on exit, unless they are ONESHOT. For ONESHOT service, if it's in
606     // stopping status, we just set SVC_RESTART flag so it will get restarted
607     // in Reap().
608     if (flags_ & SVC_RUNNING) {
609         if ((flags_ & SVC_ONESHOT) && disabled) {
610             flags_ |= SVC_RESTART;
611         }
612 
613         LOG(INFO) << "service '" << name_
614                   << "' requested start, but it is already running (flags: " << flags_ << ")";
615 
616         // It is not an error to try to start a service that is already running.
617         reboot_on_failure.Disable();
618         return {};
619     }
620 
621     // cgroups_activated is used for communication from the parent to the child
622     // while setsid_finished is used for communication from the child process to
623     // the parent process. These two communication channels are separate because
624     // combining these into a single communication channel would introduce a
625     // race between the Write() calls by the parent and by the child.
626     InterprocessFifo cgroups_activated, setsid_finished;
627     OR_RETURN(cgroups_activated.Initialize());
628     OR_RETURN(setsid_finished.Initialize());
629 
630     if (Result<void> result = CheckConsole(); !result.ok()) {
631         return result;
632     }
633 
634     struct stat sb;
635     if (stat(args_[0].c_str(), &sb) == -1) {
636         flags_ |= SVC_DISABLED;
637         return ErrnoError() << "Cannot find '" << args_[0] << "'";
638     }
639 
640     std::string scon;
641     if (!seclabel_.empty()) {
642         scon = seclabel_;
643     } else {
644         auto result = ComputeContextFromExecutable(args_[0]);
645         if (!result.ok()) {
646             return result.error();
647         }
648         scon = *result;
649     }
650 
651     if (!mount_namespace_.has_value()) {
652         // remember from which mount namespace the service should start
653         SetMountNamespace();
654     }
655 
656     post_data_ = ServiceList::GetInstance().IsPostData();
657 
658     LOG(INFO) << "starting service '" << name_ << "'...";
659 
660     std::vector<Descriptor> descriptors;
661     for (const auto& socket : sockets_) {
662         if (auto result = socket.Create(scon); result.ok()) {
663             descriptors.emplace_back(std::move(*result));
664         } else {
665             LOG(INFO) << "Could not create socket '" << socket.name << "': " << result.error();
666         }
667     }
668 
669     for (const auto& file : files_) {
670         if (auto result = file.Create(); result.ok()) {
671             descriptors.emplace_back(std::move(*result));
672         } else {
673             LOG(INFO) << "Could not open file '" << file.name << "': " << result.error();
674         }
675     }
676 
677     pid_t pid = -1;
678     if (namespaces_.flags) {
679         pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr);
680     } else {
681         pid = fork();
682     }
683 
684     if (pid == 0) {
685         umask(077);
686         cgroups_activated.CloseWriteFd();
687         setsid_finished.CloseReadFd();
688         RunService(descriptors, std::move(cgroups_activated), std::move(setsid_finished));
689         _exit(127);
690     } else {
691         cgroups_activated.CloseReadFd();
692         setsid_finished.CloseWriteFd();
693     }
694 
695     if (pid < 0) {
696         pid_ = 0;
697         return ErrnoError() << "Failed to fork";
698     }
699 
700     once_environment_vars_.clear();
701 
702     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
703         std::string oom_str = std::to_string(oom_score_adjust_);
704         std::string oom_file = StringPrintf("/proc/%d/oom_score_adj", pid);
705         if (!WriteStringToFile(oom_str, oom_file)) {
706             PLOG(ERROR) << "couldn't write oom_score_adj";
707         }
708     }
709 
710     time_started_ = boot_clock::now();
711     pid_ = pid;
712     flags_ |= SVC_RUNNING;
713     start_order_ = next_start_order_++;
714     process_cgroup_empty_ = false;
715 
716     if (CgroupsAvailable()) {
717         bool use_memcg = swappiness_ != -1 || soft_limit_in_bytes_ != -1 || limit_in_bytes_ != -1 ||
718                          limit_percent_ != -1 || !limit_property_.empty();
719         errno = -createProcessGroup(uid(), pid_, use_memcg);
720         if (errno != 0) {
721             Result<void> result = cgroups_activated.Write(kActivatingCgroupsFailed);
722             if (!result.ok()) {
723                 return Error() << "Sending notification failed: " << result.error();
724             }
725             return Error() << "createProcessGroup(" << uid() << ", " << pid_ << ", " << use_memcg
726                            << ") failed for service '" << name_ << "': " << strerror(errno);
727         }
728 
729         // When the blkio controller is mounted in the v1 hierarchy, NormalIoPriority is
730         // the default (/dev/blkio). When the blkio controller is mounted in the v2 hierarchy, the
731         // NormalIoPriority profile has to be applied explicitly.
732         SetProcessProfiles(uid(), pid_, {"NormalIoPriority"});
733 
734         if (use_memcg) {
735             ConfigureMemcg();
736         }
737     }
738 
739     if (oom_score_adjust_ != DEFAULT_OOM_SCORE_ADJUST) {
740         LmkdRegister(name_, uid(), pid_, oom_score_adjust_);
741     }
742 
743     if (Result<void> result = cgroups_activated.Write(kCgroupsActivated); !result.ok()) {
744         return Error() << "Sending cgroups activated notification failed: " << result.error();
745     }
746 
747     cgroups_activated.Close();
748 
749     // Call setpgid() from the parent process to make sure that this call has
750     // finished before the parent process calls kill(-pgid, ...).
751     if (!RequiresConsole(proc_attr_)) {
752         if (setpgid(pid, pid) < 0) {
753             switch (errno) {
754                 case EACCES:  // Child has already performed setpgid() followed by execve().
755                 case ESRCH:   // Child process no longer exists.
756                     break;
757                 default:
758                     PLOG(ERROR) << "setpgid() from parent failed";
759             }
760         }
761     } else {
762         // The Read() call below will return an error if the child is killed.
763         if (Result<uint8_t> result = setsid_finished.Read();
764             !result.ok() || *result != kSetSidFinished) {
765             if (!result.ok()) {
766                 return Error() << "Waiting for setsid() failed: " << result.error();
767             } else {
768                 return Error() << "Waiting for setsid() failed: " << static_cast<uint32_t>(*result)
769                                << " <> " << static_cast<uint32_t>(kSetSidFinished);
770             }
771         }
772     }
773 
774     setsid_finished.Close();
775 
776     NotifyStateChange("running");
777     reboot_on_failure.Disable();
778 
779     LOG(INFO) << "... started service '" << name_ << "' has pid " << pid_;
780 
781     return {};
782 }
783 
784 // Set mount namespace for the service.
785 // The reason why remember the mount namespace:
786 //   If this service is started before APEXes and corresponding linker configuration
787 //   get available, mark it as pre-apexd one. Note that this marking is
788 //   permanent. So for example, if the service is re-launched (e.g., due
789 //   to crash), it is still recognized as pre-apexd... for consistency.
SetMountNamespace()790 void Service::SetMountNamespace() {
791     // APEXd is always started in the "current" namespace because it is the process to set up
792     // the current namespace. So, leave mount_namespace_ as empty.
793     if (args_[0] == "/system/bin/apexd") {
794         return;
795     }
796     // Services in the following list start in the "default" mount namespace.
797     // Note that they should use bootstrap bionic if they start before APEXes are ready.
798     static const std::set<std::string> kUseDefaultMountNamespace = {
799             "ueventd",           // load firmwares from APEXes
800             "hwservicemanager",  // load VINTF fragments from APEXes
801             "servicemanager",    // load VINTF fragments from APEXes
802     };
803     if (kUseDefaultMountNamespace.find(name_) != kUseDefaultMountNamespace.end()) {
804         mount_namespace_ = NS_DEFAULT;
805         return;
806     }
807     // Use the "default" mount namespace only if it's ready
808     mount_namespace_ = IsDefaultMountNamespaceReady() ? NS_DEFAULT : NS_BOOTSTRAP;
809 }
810 
ThreadCount()811 static int ThreadCount() {
812     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir("/proc/self/task"), closedir);
813     if (!dir) {
814         return -1;
815     }
816 
817     int count = 0;
818     dirent* entry;
819     while ((entry = readdir(dir.get())) != nullptr) {
820         if (entry->d_name[0] != '.') {
821             count++;
822         }
823     }
824     return count;
825 }
826 
827 // Must be called BEFORE any threads are created. See also the sigprocmask() man page.
CreateSigchldFd()828 unique_fd Service::CreateSigchldFd() {
829     CHECK_EQ(ThreadCount(), 1);
830     sigset_t mask;
831     sigemptyset(&mask);
832     sigaddset(&mask, SIGCHLD);
833     if (sigprocmask(SIG_BLOCK, &mask, nullptr) < 0) {
834         PLOG(FATAL) << "Failed to block SIGCHLD";
835     }
836 
837     return unique_fd(signalfd(-1, &mask, SFD_CLOEXEC));
838 }
839 
SetStartedInFirstStage(pid_t pid)840 void Service::SetStartedInFirstStage(pid_t pid) {
841     LOG(INFO) << "adding first-stage service '" << name_ << "'...";
842 
843     time_started_ = boot_clock::now();  // not accurate, but doesn't matter here
844     pid_ = pid;
845     flags_ |= SVC_RUNNING;
846     start_order_ = next_start_order_++;
847 
848     NotifyStateChange("running");
849 }
850 
ResetFlagsForStart()851 void Service::ResetFlagsForStart() {
852     // Starting a service removes it from the disabled or reset state and
853     // immediately takes it out of the restarting state if it was in there.
854     flags_ &= ~(SVC_DISABLED | SVC_RESTARTING | SVC_RESET | SVC_RESTART | SVC_DISABLED_START);
855 }
856 
StartIfNotDisabled()857 Result<void> Service::StartIfNotDisabled() {
858     if (!(flags_ & SVC_DISABLED)) {
859         return Start();
860     } else {
861         flags_ |= SVC_DISABLED_START;
862     }
863     return {};
864 }
865 
Enable()866 Result<void> Service::Enable() {
867     flags_ &= ~(SVC_DISABLED | SVC_RC_DISABLED);
868     if (flags_ & SVC_DISABLED_START) {
869         return Start();
870     }
871     return {};
872 }
873 
Reset()874 void Service::Reset() {
875     StopOrReset(SVC_RESET);
876 }
877 
Stop()878 void Service::Stop() {
879     StopOrReset(SVC_DISABLED);
880 }
881 
Terminate()882 void Service::Terminate() {
883     flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
884     flags_ |= SVC_DISABLED;
885     if (pid_) {
886         KillProcessGroup(SIGTERM);
887         NotifyStateChange("stopping");
888     }
889 }
890 
Timeout()891 void Service::Timeout() {
892     // All process state flags will be taken care of in Reap(), we really just want to kill the
893     // process here when it times out.  Oneshot processes will transition to be disabled, and
894     // all other processes will transition to be restarting.
895     LOG(INFO) << "Service '" << name_ << "' expired its timeout of " << timeout_period_->count()
896               << " seconds and will now be killed";
897     if (pid_) {
898         KillProcessGroup(SIGKILL);
899         NotifyStateChange("stopping");
900     }
901 }
902 
Restart()903 void Service::Restart() {
904     if (flags_ & SVC_RUNNING) {
905         /* Stop, wait, then start the service. */
906         StopOrReset(SVC_RESTART);
907     } else if (!(flags_ & SVC_RESTARTING)) {
908         /* Just start the service since it's not running. */
909         if (auto result = Start(); !result.ok()) {
910             LOG(ERROR) << "Could not restart '" << name_ << "': " << result.error();
911         }
912     } /* else: Service is restarting anyways. */
913 }
914 
915 // The how field should be either SVC_DISABLED, SVC_RESET, or SVC_RESTART.
StopOrReset(int how)916 void Service::StopOrReset(int how) {
917     // The service is still SVC_RUNNING until its process exits, but if it has
918     // already exited it shoudn't attempt a restart yet.
919     flags_ &= ~(SVC_RESTARTING | SVC_DISABLED_START);
920 
921     if ((how != SVC_DISABLED) && (how != SVC_RESET) && (how != SVC_RESTART)) {
922         // An illegal flag: default to SVC_DISABLED.
923         LOG(ERROR) << "service '" << name_ << "' requested unknown flag " << how
924                    << ", defaulting to disabling it.";
925         how = SVC_DISABLED;
926     }
927 
928     // If the service has not yet started, prevent it from auto-starting with its class.
929     if (how == SVC_RESET) {
930         flags_ |= (flags_ & SVC_RC_DISABLED) ? SVC_DISABLED : SVC_RESET;
931     } else {
932         flags_ |= how;
933     }
934     // Make sure it's in right status when a restart immediately follow a
935     // stop/reset or vice versa.
936     if (how == SVC_RESTART) {
937         flags_ &= (~(SVC_DISABLED | SVC_RESET));
938     } else {
939         flags_ &= (~SVC_RESTART);
940     }
941 
942     if (pid_) {
943         if (flags_ & SVC_GENTLE_KILL) {
944             KillProcessGroup(SIGTERM);
945             if (!process_cgroup_empty()) std::this_thread::sleep_for(200ms);
946         }
947         KillProcessGroup(SIGKILL);
948         NotifyStateChange("stopping");
949     } else {
950         NotifyStateChange("stopped");
951     }
952 }
953 
MakeTemporaryOneshotService(const std::vector<std::string> & args)954 Result<std::unique_ptr<Service>> Service::MakeTemporaryOneshotService(
955         const std::vector<std::string>& args) {
956     // Parse the arguments: exec [SECLABEL [UID [GID]*] --] COMMAND ARGS...
957     // SECLABEL can be a - to denote default
958     std::size_t command_arg = 1;
959     for (std::size_t i = 1; i < args.size(); ++i) {
960         if (args[i] == "--") {
961             command_arg = i + 1;
962             break;
963         }
964     }
965     if (command_arg > 4 + NR_SVC_SUPP_GIDS) {
966         return Error() << "exec called with too many supplementary group ids";
967     }
968 
969     if (command_arg >= args.size()) {
970         return Error() << "exec called without command";
971     }
972     std::vector<std::string> str_args(args.begin() + command_arg, args.end());
973 
974     static size_t exec_count = 0;
975     exec_count++;
976     std::string name = "exec " + std::to_string(exec_count) + " (" + Join(str_args, " ") + ")";
977 
978     unsigned flags = SVC_ONESHOT | SVC_TEMPORARY;
979     unsigned namespace_flags = 0;
980 
981     std::string seclabel = "";
982     if (command_arg > 2 && args[1] != "-") {
983         seclabel = args[1];
984     }
985     Result<uid_t> uid = 0;
986     if (command_arg > 3) {
987         uid = DecodeUid(args[2]);
988         if (!uid.ok()) {
989             return Error() << "Unable to decode UID for '" << args[2] << "': " << uid.error();
990         }
991     }
992     Result<gid_t> gid = 0;
993     std::vector<gid_t> supp_gids;
994     if (command_arg > 4) {
995         gid = DecodeUid(args[3]);
996         if (!gid.ok()) {
997             return Error() << "Unable to decode GID for '" << args[3] << "': " << gid.error();
998         }
999         std::size_t nr_supp_gids = command_arg - 1 /* -- */ - 4 /* exec SECLABEL UID GID */;
1000         for (size_t i = 0; i < nr_supp_gids; ++i) {
1001             auto supp_gid = DecodeUid(args[4 + i]);
1002             if (!supp_gid.ok()) {
1003                 return Error() << "Unable to decode GID for '" << args[4 + i]
1004                                << "': " << supp_gid.error();
1005             }
1006             supp_gids.push_back(*supp_gid);
1007         }
1008     }
1009 
1010     return std::make_unique<Service>(name, flags, *uid, *gid, supp_gids, namespace_flags, seclabel,
1011                                      nullptr, /*filename=*/"", str_args);
1012 }
1013 
1014 // This is used for snapuserd_proxy, which hands off a socket to snapuserd. It's
1015 // a special case to support the daemon launched in first-stage init. The persist
1016 // feature is not part of the init language and is only used here.
MarkSocketPersistent(const std::string & socket_name)1017 bool Service::MarkSocketPersistent(const std::string& socket_name) {
1018     for (auto& socket : sockets_) {
1019         if (socket.name == socket_name) {
1020             socket.persist = true;
1021             return true;
1022         }
1023     }
1024     return false;
1025 }
1026 
1027 }  // namespace init
1028 }  // namespace android
1029