1 /*
2  *  Copyright 2014 Google, Inc
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "libprocessgroup"
19 
20 #include <assert.h>
21 #include <dirent.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <poll.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 
33 #include <chrono>
34 #include <cstring>
35 #include <map>
36 #include <memory>
37 #include <mutex>
38 #include <set>
39 #include <string>
40 #include <thread>
41 
42 #include <android-base/file.h>
43 #include <android-base/logging.h>
44 #include <android-base/properties.h>
45 #include <android-base/stringprintf.h>
46 #include <android-base/strings.h>
47 #include <cutils/android_filesystem_config.h>
48 #include <processgroup/processgroup.h>
49 #include <task_profiles.h>
50 
51 using android::base::GetBoolProperty;
52 using android::base::StartsWith;
53 using android::base::StringPrintf;
54 using android::base::WriteStringToFile;
55 
56 using namespace std::chrono_literals;
57 
58 #define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
59 #define PROCESSGROUP_CGROUP_KILL_FILE "cgroup.kill"
60 #define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"
61 
CgroupsAvailable()62 bool CgroupsAvailable() {
63     static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
64     return cgroups_available;
65 }
66 
CgroupGetControllerPath(const std::string & cgroup_name,std::string * path)67 bool CgroupGetControllerPath(const std::string& cgroup_name, std::string* path) {
68     auto controller = CgroupMap::GetInstance().FindController(cgroup_name);
69 
70     if (!controller.HasValue()) {
71         return false;
72     }
73 
74     if (path) {
75         *path = controller.path();
76     }
77 
78     return true;
79 }
80 
CgroupKillAvailable()81 static bool CgroupKillAvailable() {
82     static std::once_flag f;
83     static bool cgroup_kill_available = false;
84     std::call_once(f, []() {
85         std::string cg_kill;
86         CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cg_kill);
87         // cgroup.kill is not on the root cgroup, so check a non-root cgroup that should always
88         // exist
89         cg_kill = ConvertUidToPath(cg_kill.c_str(), AID_ROOT) + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
90         cgroup_kill_available = access(cg_kill.c_str(), F_OK) == 0;
91     });
92 
93     return cgroup_kill_available;
94 }
95 
CgroupGetMemcgAppsPath(std::string * path)96 static bool CgroupGetMemcgAppsPath(std::string* path) {
97     CgroupController controller = CgroupMap::GetInstance().FindController("memory");
98 
99     if (!controller.HasValue()) {
100         return false;
101     }
102 
103     if (path) {
104         *path = controller.path();
105         if (controller.version() == 1) {
106             *path += "/apps";
107         }
108     }
109 
110     return true;
111 }
112 
CgroupGetControllerFromPath(const std::string & path,std::string * cgroup_name)113 bool CgroupGetControllerFromPath(const std::string& path, std::string* cgroup_name) {
114     auto controller = CgroupMap::GetInstance().FindControllerByPath(path);
115 
116     if (!controller.HasValue()) {
117         return false;
118     }
119 
120     if (cgroup_name) {
121         *cgroup_name = controller.name();
122     }
123 
124     return true;
125 }
126 
CgroupGetAttributePath(const std::string & attr_name,std::string * path)127 bool CgroupGetAttributePath(const std::string& attr_name, std::string* path) {
128     const TaskProfiles& tp = TaskProfiles::GetInstance();
129     const IProfileAttribute* attr = tp.GetAttribute(attr_name);
130 
131     if (attr == nullptr) {
132         return false;
133     }
134 
135     if (path) {
136         *path = StringPrintf("%s/%s", attr->controller()->path(), attr->file_name().c_str());
137     }
138 
139     return true;
140 }
141 
CgroupGetAttributePathForTask(const std::string & attr_name,pid_t tid,std::string * path)142 bool CgroupGetAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
143     const TaskProfiles& tp = TaskProfiles::GetInstance();
144     const IProfileAttribute* attr = tp.GetAttribute(attr_name);
145 
146     if (attr == nullptr) {
147         return false;
148     }
149 
150     if (!attr->GetPathForTask(tid, path)) {
151         LOG(ERROR) << "Failed to find cgroup for tid " << tid;
152         return false;
153     }
154 
155     return true;
156 }
157 
UsePerAppMemcg()158 bool UsePerAppMemcg() {
159     bool low_ram_device = GetBoolProperty("ro.config.low_ram", false);
160     return GetBoolProperty("ro.config.per_app_memcg", low_ram_device);
161 }
162 
isMemoryCgroupSupported()163 static bool isMemoryCgroupSupported() {
164     static bool memcg_supported = CgroupMap::GetInstance().FindController("memory").IsUsable();
165 
166     return memcg_supported;
167 }
168 
DropTaskProfilesResourceCaching()169 void DropTaskProfilesResourceCaching() {
170     TaskProfiles::GetInstance().DropResourceCaching(ProfileAction::RCT_TASK);
171     TaskProfiles::GetInstance().DropResourceCaching(ProfileAction::RCT_PROCESS);
172 }
173 
SetProcessProfiles(uid_t uid,pid_t pid,const std::vector<std::string> & profiles)174 bool SetProcessProfiles(uid_t uid, pid_t pid, const std::vector<std::string>& profiles) {
175     return TaskProfiles::GetInstance().SetProcessProfiles(
176             uid, pid, std::span<const std::string>(profiles), false);
177 }
178 
SetProcessProfiles(uid_t uid,pid_t pid,std::initializer_list<std::string_view> profiles)179 bool SetProcessProfiles(uid_t uid, pid_t pid, std::initializer_list<std::string_view> profiles) {
180     return TaskProfiles::GetInstance().SetProcessProfiles(
181             uid, pid, std::span<const std::string_view>(profiles), false);
182 }
183 
SetProcessProfiles(uid_t uid,pid_t pid,std::span<const std::string_view> profiles)184 bool SetProcessProfiles(uid_t uid, pid_t pid, std::span<const std::string_view> profiles) {
185     return TaskProfiles::GetInstance().SetProcessProfiles(uid, pid, profiles, false);
186 }
187 
SetProcessProfilesCached(uid_t uid,pid_t pid,const std::vector<std::string> & profiles)188 bool SetProcessProfilesCached(uid_t uid, pid_t pid, const std::vector<std::string>& profiles) {
189     return TaskProfiles::GetInstance().SetProcessProfiles(
190             uid, pid, std::span<const std::string>(profiles), true);
191 }
192 
SetTaskProfiles(pid_t tid,const std::vector<std::string> & profiles,bool use_fd_cache)193 bool SetTaskProfiles(pid_t tid, const std::vector<std::string>& profiles, bool use_fd_cache) {
194     return TaskProfiles::GetInstance().SetTaskProfiles(tid, std::span<const std::string>(profiles),
195                                                        use_fd_cache);
196 }
197 
SetTaskProfiles(pid_t tid,std::initializer_list<std::string_view> profiles,bool use_fd_cache)198 bool SetTaskProfiles(pid_t tid, std::initializer_list<std::string_view> profiles,
199                      bool use_fd_cache) {
200     return TaskProfiles::GetInstance().SetTaskProfiles(
201             tid, std::span<const std::string_view>(profiles), use_fd_cache);
202 }
203 
SetTaskProfiles(pid_t tid,std::span<const std::string_view> profiles,bool use_fd_cache)204 bool SetTaskProfiles(pid_t tid, std::span<const std::string_view> profiles, bool use_fd_cache) {
205     return TaskProfiles::GetInstance().SetTaskProfiles(tid, profiles, use_fd_cache);
206 }
207 
208 // C wrapper for SetProcessProfiles.
209 // No need to have this in the header file because this function is specifically for crosvm. Crosvm
210 // which is written in Rust has its own declaration of this foreign function and doesn't rely on the
211 // header. See
212 // https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3574427/5/src/linux/android.rs#12
android_set_process_profiles(uid_t uid,pid_t pid,size_t num_profiles,const char * profiles[])213 extern "C" bool android_set_process_profiles(uid_t uid, pid_t pid, size_t num_profiles,
214                                              const char* profiles[]) {
215     std::vector<std::string_view> profiles_;
216     profiles_.reserve(num_profiles);
217     for (size_t i = 0; i < num_profiles; i++) {
218         profiles_.emplace_back(profiles[i]);
219     }
220     return SetProcessProfiles(uid, pid, std::span<const std::string_view>(profiles_));
221 }
222 
SetUserProfiles(uid_t uid,const std::vector<std::string> & profiles)223 bool SetUserProfiles(uid_t uid, const std::vector<std::string>& profiles) {
224     return TaskProfiles::GetInstance().SetUserProfiles(uid, std::span<const std::string>(profiles),
225                                                        false);
226 }
227 
RemoveCgroup(const char * cgroup,uid_t uid,pid_t pid)228 static int RemoveCgroup(const char* cgroup, uid_t uid, pid_t pid) {
229     auto path = ConvertUidPidToPath(cgroup, uid, pid);
230     int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
231 
232     if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
233         // Isolated UIDs are unlikely to be reused soon after removal,
234         // so free up the kernel resources for the UID level cgroup.
235         path = ConvertUidToPath(cgroup, uid);
236         ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
237     }
238 
239     if (ret < 0 && errno == ENOENT) {
240         // This function is idempoetent, but still warn here.
241         LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
242         ret = 0;
243     }
244 
245     return ret;
246 }
247 
RemoveEmptyUidCgroups(const std::string & uid_path)248 static bool RemoveEmptyUidCgroups(const std::string& uid_path) {
249     std::unique_ptr<DIR, decltype(&closedir)> uid(opendir(uid_path.c_str()), closedir);
250     bool empty = true;
251     if (uid != NULL) {
252         dirent* dir;
253         while ((dir = readdir(uid.get())) != nullptr) {
254             if (dir->d_type != DT_DIR) {
255                 continue;
256             }
257 
258             if (!StartsWith(dir->d_name, "pid_")) {
259                 continue;
260             }
261 
262             auto path = StringPrintf("%s/%s", uid_path.c_str(), dir->d_name);
263             LOG(VERBOSE) << "Removing " << path;
264             if (rmdir(path.c_str()) == -1) {
265                 if (errno != EBUSY) {
266                     PLOG(WARNING) << "Failed to remove " << path;
267                 }
268                 empty = false;
269             }
270         }
271     }
272     return empty;
273 }
274 
removeAllEmptyProcessGroups()275 void removeAllEmptyProcessGroups() {
276     LOG(VERBOSE) << "removeAllEmptyProcessGroups()";
277 
278     std::vector<std::string> cgroups;
279     std::string path, memcg_apps_path;
280 
281     if (CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &path)) {
282         cgroups.push_back(path);
283     }
284     if (CgroupGetMemcgAppsPath(&memcg_apps_path) && memcg_apps_path != path) {
285         cgroups.push_back(memcg_apps_path);
286     }
287 
288     for (std::string cgroup_root_path : cgroups) {
289         std::unique_ptr<DIR, decltype(&closedir)> root(opendir(cgroup_root_path.c_str()), closedir);
290         if (root == NULL) {
291             PLOG(ERROR) << __func__ << " failed to open " << cgroup_root_path;
292         } else {
293             dirent* dir;
294             while ((dir = readdir(root.get())) != nullptr) {
295                 if (dir->d_type != DT_DIR) {
296                     continue;
297                 }
298 
299                 if (!StartsWith(dir->d_name, "uid_")) {
300                     continue;
301                 }
302 
303                 auto path = StringPrintf("%s/%s", cgroup_root_path.c_str(), dir->d_name);
304                 if (!RemoveEmptyUidCgroups(path)) {
305                     LOG(VERBOSE) << "Skip removing " << path;
306                     continue;
307                 }
308                 LOG(VERBOSE) << "Removing " << path;
309                 if (rmdir(path.c_str()) == -1 && errno != EBUSY) {
310                     PLOG(WARNING) << "Failed to remove " << path;
311                 }
312             }
313         }
314     }
315 }
316 
317 /**
318  * Process groups are primarily created by the Zygote, meaning that uid/pid groups are created by
319  * the user root. Ownership for the newly created cgroup and all of its files must thus be
320  * transferred for the user/group passed as uid/gid before system_server can properly access them.
321  */
MkdirAndChown(const std::string & path,mode_t mode,uid_t uid,gid_t gid)322 static bool MkdirAndChown(const std::string& path, mode_t mode, uid_t uid, gid_t gid) {
323     if (mkdir(path.c_str(), mode) == -1) {
324         if (errno == EEXIST) {
325             // Directory already exists and permissions have been set at the time it was created
326             return true;
327         }
328         return false;
329     }
330 
331     auto dir = std::unique_ptr<DIR, decltype(&closedir)>(opendir(path.c_str()), closedir);
332 
333     if (dir == NULL) {
334         PLOG(ERROR) << "opendir failed for " << path;
335         goto err;
336     }
337 
338     struct dirent* dir_entry;
339     while ((dir_entry = readdir(dir.get()))) {
340         if (!strcmp("..", dir_entry->d_name)) {
341             continue;
342         }
343 
344         std::string file_path = path + "/" + dir_entry->d_name;
345 
346         if (lchown(file_path.c_str(), uid, gid) < 0) {
347             PLOG(ERROR) << "lchown failed for " << file_path;
348             goto err;
349         }
350 
351         if (fchmodat(AT_FDCWD, file_path.c_str(), mode, AT_SYMLINK_NOFOLLOW) != 0) {
352             PLOG(ERROR) << "fchmodat failed for " << file_path;
353             goto err;
354         }
355     }
356 
357     return true;
358 err:
359     int saved_errno = errno;
360     rmdir(path.c_str());
361     errno = saved_errno;
362 
363     return false;
364 }
365 
sendSignalToProcessGroup(uid_t uid,pid_t initialPid,int signal)366 bool sendSignalToProcessGroup(uid_t uid, pid_t initialPid, int signal) {
367     std::set<pid_t> pgids, pids;
368 
369     if (CgroupsAvailable()) {
370         std::string hierarchy_root_path, cgroup_v2_path;
371         CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
372         cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
373 
374         if (signal == SIGKILL && CgroupKillAvailable()) {
375             LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_KILL_FILE << " to SIGKILL "
376                          << cgroup_v2_path;
377 
378             // We need to kill the process group in addition to the cgroup. For normal apps they
379             // should completely overlap, but system_server kills depend on process group kills to
380             // take down apps which are in their own cgroups and not individually targeted.
381             if (kill(-initialPid, signal) == -1 && errno != ESRCH) {
382                 PLOG(WARNING) << "kill(" << -initialPid << ", " << signal << ") failed";
383             }
384 
385             const std::string killfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
386             if (WriteStringToFile("1", killfilepath)) {
387                 return true;
388             } else {
389                 PLOG(ERROR) << "Failed to write 1 to " << killfilepath;
390                 // Fallback to cgroup.procs below
391             }
392         }
393 
394         // Since cgroup.kill only sends SIGKILLs, we read cgroup.procs to find each process to
395         // signal individually. This is more costly than using cgroup.kill for SIGKILLs.
396         LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
397                      << ") " << cgroup_v2_path;
398 
399         // We separate all of the pids in the cgroup into those pids that are also the leaders of
400         // process groups (stored in the pgids set) and those that are not (stored in the pids set).
401         const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
402         std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
403         if (!fp) {
404             // This should only happen if the cgroup has already been removed with a successful call
405             // to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
406             // killProcessGroup calls if they fail without ENOENT.
407             PLOG(ERROR) << "Failed to open " << procsfilepath;
408             kill(-initialPid, signal);
409             return false;
410         }
411 
412         pid_t pid;
413         bool file_is_empty = true;
414         while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
415             file_is_empty = false;
416             if (pid == 0) {
417                 // Should never happen...  but if it does, trying to kill this
418                 // will boomerang right back and kill us!  Let's not let that happen.
419                 LOG(WARNING)
420                         << "Yikes, we've been told to kill pid 0!  How about we don't do that?";
421                 continue;
422             }
423             pid_t pgid = getpgid(pid);
424             if (pgid == -1) PLOG(ERROR) << "getpgid(" << pid << ") failed";
425             if (pgid == pid) {
426                 pgids.emplace(pid);
427             } else {
428                 pids.emplace(pid);
429             }
430         }
431         if (!file_is_empty) {
432             // Erase all pids that will be killed when we kill the process groups.
433             for (auto it = pids.begin(); it != pids.end();) {
434                 pid_t pgid = getpgid(*it);
435                 if (pgids.count(pgid) == 1) {
436                     it = pids.erase(it);
437                 } else {
438                     ++it;
439                 }
440             }
441         }
442     }
443 
444     pgids.emplace(initialPid);
445 
446     // Kill all process groups.
447     for (const auto pgid : pgids) {
448         LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
449                      << " as part of process cgroup " << initialPid;
450 
451         if (kill(-pgid, signal) == -1 && errno != ESRCH) {
452             PLOG(WARNING) << "kill(" << -pgid << ", " << signal << ") failed";
453         }
454     }
455 
456     // Kill remaining pids.
457     for (const auto pid : pids) {
458         LOG(VERBOSE) << "Killing pid " << pid << " in uid " << uid << " as part of process cgroup "
459                      << initialPid;
460 
461         if (kill(pid, signal) == -1 && errno != ESRCH) {
462             PLOG(WARNING) << "kill(" << pid << ", " << signal << ") failed";
463         }
464     }
465 
466     return true;
467 }
468 
469 template <typename T>
toMillisec(T && duration)470 static std::chrono::milliseconds toMillisec(T&& duration) {
471     return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
472 }
473 
474 enum class populated_status
475 {
476     populated,
477     not_populated,
478     error
479 };
480 
cgroupIsPopulated(int events_fd)481 static populated_status cgroupIsPopulated(int events_fd) {
482     const std::string POPULATED_KEY("populated ");
483     const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;
484 
485     std::string buf;
486     buf.resize(MAX_EVENTS_FILE_SIZE);
487     ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
488     if (len == -1) {
489         PLOG(ERROR) << "Could not read cgroup.events: ";
490         // Potentially ENODEV if the cgroup has been removed since we opened this file, but that
491         // shouldn't have happened yet.
492         return populated_status::error;
493     }
494 
495     if (len == 0) {
496         LOG(ERROR) << "cgroup.events EOF";
497         return populated_status::error;
498     }
499 
500     buf.resize(len);
501 
502     const std::string::size_type pos = buf.find(POPULATED_KEY);
503     if (pos == std::string::npos) {
504         LOG(ERROR) << "Could not find populated key in cgroup.events";
505         return populated_status::error;
506     }
507 
508     if (pos + POPULATED_KEY.size() + 1 > len) {
509         LOG(ERROR) << "Partial read of cgroup.events";
510         return populated_status::error;
511     }
512 
513     return buf[pos + POPULATED_KEY.size()] == '1' ?
514         populated_status::populated : populated_status::not_populated;
515 }
516 
517 // The default timeout of 2200ms comes from the default number of retries in a previous
518 // implementation of this function. The default retry value was 40 for killing and 400 for cgroup
519 // removal with 5ms sleeps between each retry.
KillProcessGroup(uid_t uid,pid_t initialPid,int signal,bool once=false,std::chrono::steady_clock::time_point until=std::chrono::steady_clock::now ()+2200ms)520 static int KillProcessGroup(
521         uid_t uid, pid_t initialPid, int signal, bool once = false,
522         std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
523     if (uid < 0) {
524         LOG(ERROR) << __func__ << ": invalid UID " << uid;
525         return -1;
526     }
527     if (initialPid <= 0) {
528         LOG(ERROR) << __func__ << ": invalid PID " << initialPid;
529         return -1;
530     }
531 
532     // Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
533     // whether its cgroup exists or not. This should only be necessary if a bug results in the
534     // migration of the targeted process out of its cgroup, which we will also attempt to kill.
535     const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);
536 
537     if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1;
538 
539     std::string hierarchy_root_path;
540     CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
541 
542     const std::string cgroup_v2_path =
543             ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
544 
545     const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
546     android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
547     if (events_fd.get() == -1) {
548         PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
549         return -1;
550     }
551 
552     struct pollfd fds = {
553         .fd = events_fd,
554         .events = POLLPRI,
555     };
556 
557     const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
558 
559     // The primary reason to loop here is to capture any new forks or migrations that could occur
560     // after we send signals to the original set of processes, but before all of those processes
561     // exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
562     // ensure this completes successfully to avoid permanent memory leaks, but we still place a
563     // large default upper bound on the amount of time we spend in this loop. The amount of CPU
564     // contention, and the amount of work that needs to be done in do_exit for each process
565     // determines how long this will take.
566     int ret;
567     do {
568         populated_status populated;
569         while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
570                std::chrono::steady_clock::now() < until) {
571 
572             sendSignalToProcessGroup(uid, initialPid, signal);
573             if (once) {
574                 populated = cgroupIsPopulated(events_fd.get());
575                 break;
576             }
577 
578             const std::chrono::steady_clock::time_point poll_start =
579                     std::chrono::steady_clock::now();
580 
581             if (poll_start < until)
582                 ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));
583 
584             if (ret == -1) {
585                 // Fallback to 5ms sleeps if poll fails
586                 PLOG(ERROR) << "Poll on " << eventsfile << "failed";
587                 const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
588                 if (now < until)
589                     std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
590             }
591 
592             LOG(VERBOSE) << "Waited "
593                          << toMillisec(std::chrono::steady_clock::now() - poll_start).count()
594                          << " ms for " << eventsfile << " poll";
595         }
596 
597         const std::chrono::milliseconds kill_duration =
598                 toMillisec(std::chrono::steady_clock::now() - start);
599 
600         if (populated == populated_status::populated) {
601             LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
602                          << " after " << kill_duration.count() << " ms";
603             // We'll still try the cgroup removal below which we expect to log an error.
604         } else if (populated == populated_status::not_populated) {
605             LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
606                          << " after " << kill_duration.count() << " ms";
607         }
608 
609         ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
610         if (ret)
611             PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
612         else
613             LOG(INFO) << "Removed cgroup " << cgroup_v2_path;
614 
615         if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
616             // This per-application memcg v1 case should eventually be removed after migration to
617             // memcg v2.
618             std::string memcg_apps_path;
619             if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
620                 (ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
621                 const auto memcg_v1_cgroup_path =
622                         ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
623                 PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
624             }
625         }
626 
627         if (once) break;
628         if (std::chrono::steady_clock::now() >= until) break;
629     } while (ret && errno == EBUSY);
630 
631     return ret;
632 }
633 
killProcessGroup(uid_t uid,pid_t initialPid,int signal)634 int killProcessGroup(uid_t uid, pid_t initialPid, int signal) {
635     return KillProcessGroup(uid, initialPid, signal);
636 }
637 
killProcessGroupOnce(uid_t uid,pid_t initialPid,int signal)638 int killProcessGroupOnce(uid_t uid, pid_t initialPid, int signal) {
639     return KillProcessGroup(uid, initialPid, signal, true);
640 }
641 
createProcessGroupInternal(uid_t uid,pid_t initialPid,std::string cgroup,bool activate_controllers)642 static int createProcessGroupInternal(uid_t uid, pid_t initialPid, std::string cgroup,
643                                       bool activate_controllers) {
644     auto uid_path = ConvertUidToPath(cgroup.c_str(), uid);
645 
646     struct stat cgroup_stat;
647     mode_t cgroup_mode = 0750;
648     uid_t cgroup_uid = AID_SYSTEM;
649     gid_t cgroup_gid = AID_SYSTEM;
650     int ret = 0;
651 
652     if (stat(cgroup.c_str(), &cgroup_stat) < 0) {
653         PLOG(ERROR) << "Failed to get stats for " << cgroup;
654     } else {
655         cgroup_mode = cgroup_stat.st_mode;
656         cgroup_uid = cgroup_stat.st_uid;
657         cgroup_gid = cgroup_stat.st_gid;
658     }
659 
660     if (!MkdirAndChown(uid_path, cgroup_mode, cgroup_uid, cgroup_gid)) {
661         PLOG(ERROR) << "Failed to make and chown " << uid_path;
662         return -errno;
663     }
664     if (activate_controllers) {
665         ret = CgroupMap::GetInstance().ActivateControllers(uid_path);
666         if (ret) {
667             LOG(ERROR) << "Failed to activate controllers in " << uid_path;
668             return ret;
669         }
670     }
671 
672     auto uid_pid_path = ConvertUidPidToPath(cgroup.c_str(), uid, initialPid);
673 
674     if (!MkdirAndChown(uid_pid_path, cgroup_mode, cgroup_uid, cgroup_gid)) {
675         PLOG(ERROR) << "Failed to make and chown " << uid_pid_path;
676         return -errno;
677     }
678 
679     auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
680 
681     if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
682         ret = -errno;
683         PLOG(ERROR) << "Failed to write '" << initialPid << "' to " << uid_pid_procs_file;
684     }
685 
686     return ret;
687 }
688 
createProcessGroup(uid_t uid,pid_t initialPid,bool memControl)689 int createProcessGroup(uid_t uid, pid_t initialPid, bool memControl) {
690     if (uid < 0) {
691         LOG(ERROR) << __func__ << ": invalid UID " << uid;
692         return -1;
693     }
694     if (initialPid <= 0) {
695         LOG(ERROR) << __func__ << ": invalid PID " << initialPid;
696         return -1;
697     }
698 
699     if (memControl && !UsePerAppMemcg()) {
700         LOG(ERROR) << "service memory controls are used without per-process memory cgroup support";
701         return -EINVAL;
702     }
703 
704     if (std::string memcg_apps_path;
705         isMemoryCgroupSupported() && UsePerAppMemcg() && CgroupGetMemcgAppsPath(&memcg_apps_path)) {
706         // Note by bvanassche: passing 'false' as fourth argument below implies that the v1
707         // hierarchy is used. It is not clear to me whether the above conditions guarantee that the
708         // v1 hierarchy is used.
709         int ret = createProcessGroupInternal(uid, initialPid, memcg_apps_path, false);
710         if (ret != 0) {
711             return ret;
712         }
713     }
714 
715     std::string cgroup;
716     CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cgroup);
717     return createProcessGroupInternal(uid, initialPid, cgroup, true);
718 }
719 
SetProcessGroupValue(pid_t tid,const std::string & attr_name,int64_t value)720 static bool SetProcessGroupValue(pid_t tid, const std::string& attr_name, int64_t value) {
721     if (!isMemoryCgroupSupported()) {
722         LOG(ERROR) << "Memcg is not mounted.";
723         return false;
724     }
725 
726     std::string path;
727     if (!CgroupGetAttributePathForTask(attr_name, tid, &path)) {
728         LOG(ERROR) << "Failed to find attribute '" << attr_name << "'";
729         return false;
730     }
731 
732     if (!WriteStringToFile(std::to_string(value), path)) {
733         PLOG(ERROR) << "Failed to write '" << value << "' to " << path;
734         return false;
735     }
736     return true;
737 }
738 
setProcessGroupSwappiness(uid_t,pid_t pid,int swappiness)739 bool setProcessGroupSwappiness(uid_t, pid_t pid, int swappiness) {
740     return SetProcessGroupValue(pid, "MemSwappiness", swappiness);
741 }
742 
setProcessGroupSoftLimit(uid_t,pid_t pid,int64_t soft_limit_in_bytes)743 bool setProcessGroupSoftLimit(uid_t, pid_t pid, int64_t soft_limit_in_bytes) {
744     return SetProcessGroupValue(pid, "MemSoftLimit", soft_limit_in_bytes);
745 }
746 
setProcessGroupLimit(uid_t,pid_t pid,int64_t limit_in_bytes)747 bool setProcessGroupLimit(uid_t, pid_t pid, int64_t limit_in_bytes) {
748     return SetProcessGroupValue(pid, "MemLimit", limit_in_bytes);
749 }
750 
getAttributePathForTask(const std::string & attr_name,pid_t tid,std::string * path)751 bool getAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
752     return CgroupGetAttributePathForTask(attr_name, tid, path);
753 }
754 
isProfileValidForProcess(const std::string & profile_name,uid_t uid,pid_t pid)755 bool isProfileValidForProcess(const std::string& profile_name, uid_t uid, pid_t pid) {
756     const TaskProfile* tp = TaskProfiles::GetInstance().GetProfile(profile_name);
757 
758     if (tp == nullptr) {
759         return false;
760     }
761 
762     return tp->IsValidForProcess(uid, pid);
763 }
764