1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "libtimeinstate"
18 
19 #include "cputimeinstate.h"
20 #include <bpf_timeinstate.h>
21 
22 #include <dirent.h>
23 #include <errno.h>
24 #include <inttypes.h>
25 #include <sys/sysinfo.h>
26 
27 #include <mutex>
28 #include <numeric>
29 #include <optional>
30 #include <set>
31 #include <string>
32 #include <unordered_map>
33 #include <vector>
34 
35 #include <android-base/file.h>
36 #include <android-base/parseint.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40 #include <bpf/BpfMap.h>
41 #include <libbpf.h>
42 #include <log/log.h>
43 
44 using android::base::StringPrintf;
45 using android::base::unique_fd;
46 
47 namespace android {
48 namespace bpf {
49 
50 static std::mutex gInitializedMutex;
51 static bool gInitialized = false;
52 static std::mutex gTrackingMutex;
53 static bool gTracking = false;
54 static uint32_t gNPolicies = 0;
55 static uint32_t gNCpus = 0;
56 static std::vector<std::vector<uint32_t>> gPolicyFreqs;
57 static std::vector<std::vector<uint32_t>> gPolicyCpus;
58 static std::vector<uint32_t> gCpuIndexMap;
59 static std::set<uint32_t> gAllFreqs;
60 static unique_fd gTisTotalMapFd;
61 static unique_fd gTisMapFd;
62 static unique_fd gConcurrentMapFd;
63 static unique_fd gUidLastUpdateMapFd;
64 static unique_fd gPidTisMapFd;
65 
readNumbersFromFile(const std::string & path)66 static std::optional<std::vector<uint32_t>> readNumbersFromFile(const std::string &path) {
67     std::string data;
68 
69     if (!android::base::ReadFileToString(path, &data)) return {};
70 
71     auto strings = android::base::Split(data, " \n");
72     std::vector<uint32_t> ret;
73     for (const auto &s : strings) {
74         if (s.empty()) continue;
75         uint32_t n;
76         if (!android::base::ParseUint(s, &n)) return {};
77         ret.emplace_back(n);
78     }
79     return ret;
80 }
81 
isPolicyFile(const struct dirent * d)82 static int isPolicyFile(const struct dirent *d) {
83     return android::base::StartsWith(d->d_name, "policy");
84 }
85 
comparePolicyFiles(const struct dirent ** d1,const struct dirent ** d2)86 static int comparePolicyFiles(const struct dirent **d1, const struct dirent **d2) {
87     uint32_t policyN1, policyN2;
88     if (sscanf((*d1)->d_name, "policy%" SCNu32 "", &policyN1) != 1 ||
89         sscanf((*d2)->d_name, "policy%" SCNu32 "", &policyN2) != 1)
90         return 0;
91     return policyN1 - policyN2;
92 }
93 
initGlobals()94 static bool initGlobals() {
95     std::lock_guard<std::mutex> guard(gInitializedMutex);
96     if (gInitialized) return true;
97 
98     gNCpus = get_nprocs_conf();
99 
100     struct dirent **dirlist;
101     const char basepath[] = "/sys/devices/system/cpu/cpufreq";
102     int ret = scandir(basepath, &dirlist, isPolicyFile, comparePolicyFiles);
103     if (ret == -1 || ret == 0) return false;
104     gNPolicies = ret;
105 
106     std::vector<std::string> policyFileNames;
107     for (uint32_t i = 0; i < gNPolicies; ++i) {
108         policyFileNames.emplace_back(dirlist[i]->d_name);
109         free(dirlist[i]);
110     }
111     free(dirlist);
112     uint32_t max_cpu_number = 0;
113     for (const auto &policy : policyFileNames) {
114         std::vector<uint32_t> freqs;
115         for (const auto &name : {"available", "boost"}) {
116             std::string path =
117                     StringPrintf("%s/%s/scaling_%s_frequencies", basepath, policy.c_str(), name);
118             auto nums = readNumbersFromFile(path);
119             if (!nums) continue;
120             freqs.insert(freqs.end(), nums->begin(), nums->end());
121         }
122         if (freqs.empty()) return false;
123         std::sort(freqs.begin(), freqs.end());
124         gPolicyFreqs.emplace_back(freqs);
125 
126         for (auto freq : freqs) gAllFreqs.insert(freq);
127 
128         std::string path = StringPrintf("%s/%s/%s", basepath, policy.c_str(), "related_cpus");
129         auto cpus = readNumbersFromFile(path);
130         if (!cpus) return false;
131         for (auto cpu : *cpus) {
132             if(cpu > max_cpu_number)
133                 max_cpu_number = cpu;
134         }
135         gPolicyCpus.emplace_back(*cpus);
136     }
137     gCpuIndexMap = std::vector<uint32_t>(max_cpu_number+1, -1);
138     uint32_t cpuorder = 0;
139     for (const auto &cpuList : gPolicyCpus) {
140         for (auto cpu : cpuList) {
141             gCpuIndexMap[cpu] = cpuorder++;
142         }
143     }
144 
145     gTisTotalMapFd =
146             unique_fd{bpf_obj_get(BPF_FS_PATH "map_timeInState_total_time_in_state_map")};
147     if (gTisTotalMapFd < 0) return false;
148 
149     gTisMapFd = unique_fd{bpf_obj_get(BPF_FS_PATH "map_timeInState_uid_time_in_state_map")};
150     if (gTisMapFd < 0) return false;
151 
152     gConcurrentMapFd =
153             unique_fd{bpf_obj_get(BPF_FS_PATH "map_timeInState_uid_concurrent_times_map")};
154     if (gConcurrentMapFd < 0) return false;
155 
156     gUidLastUpdateMapFd =
157             unique_fd{bpf_obj_get(BPF_FS_PATH "map_timeInState_uid_last_update_map")};
158     if (gUidLastUpdateMapFd < 0) return false;
159 
160     gPidTisMapFd = unique_fd{mapRetrieveRO(BPF_FS_PATH "map_timeInState_pid_time_in_state_map")};
161     if (gPidTisMapFd < 0) return false;
162 
163     unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_pid_tracked_map"));
164     if (trackedPidMapFd < 0) return false;
165 
166     gInitialized = true;
167     return true;
168 }
169 
retrieveProgramFd(const std::string & eventType,const std::string & eventName)170 static int retrieveProgramFd(const std::string &eventType, const std::string &eventName) {
171     std::string path = StringPrintf(BPF_FS_PATH "prog_timeInState_tracepoint_%s_%s",
172                                     eventType.c_str(), eventName.c_str());
173     return retrieveProgram(path.c_str());
174 }
175 
attachTracepointProgram(const std::string & eventType,const std::string & eventName)176 static bool attachTracepointProgram(const std::string &eventType, const std::string &eventName) {
177     int prog_fd = retrieveProgramFd(eventType, eventName);
178     if (prog_fd < 0) return false;
179     return bpf_attach_tracepoint(prog_fd, eventType.c_str(), eventName.c_str()) >= 0;
180 }
181 
getPolicyFreqIdx(uint32_t policy)182 static std::optional<uint32_t> getPolicyFreqIdx(uint32_t policy) {
183     auto path = StringPrintf("/sys/devices/system/cpu/cpufreq/policy%u/scaling_cur_freq",
184                              gPolicyCpus[policy][0]);
185     auto freqVec = readNumbersFromFile(path);
186     if (!freqVec.has_value() || freqVec->size() != 1) return {};
187     for (uint32_t idx = 0; idx < gPolicyFreqs[policy].size(); ++idx) {
188         if ((*freqVec)[0] == gPolicyFreqs[policy][idx]) return idx + 1;
189     }
190     return {};
191 }
192 
193 // Check if tracking is expected to work without activating it.
isTrackingUidTimesSupported()194 bool isTrackingUidTimesSupported() {
195     auto freqs = getCpuFreqs();
196     if (!freqs || freqs->empty()) return false;
197     if (gTracking) return true;
198     if (retrieveProgramFd("sched", "sched_switch") < 0) return false;
199     if (retrieveProgramFd("power", "cpu_frequency") < 0) return false;
200     if (retrieveProgramFd("sched", "sched_process_free") < 0) return false;
201     return true;
202 }
203 
204 // Start tracking and aggregating data to be reported by getUidCpuFreqTimes and getUidsCpuFreqTimes.
205 // Returns true on success, false otherwise.
206 // Tracking is active only once a live process has successfully called this function; if the calling
207 // process dies then it must be called again to resume tracking.
208 // This function should *not* be called while tracking is already active; doing so is unnecessary
209 // and can lead to accounting errors.
startTrackingUidTimes()210 bool startTrackingUidTimes() {
211     std::lock_guard<std::mutex> guard(gTrackingMutex);
212     if (!initGlobals()) return false;
213     if (gTracking) return true;
214 
215     unique_fd cpuPolicyFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_cpu_policy_map"));
216     if (cpuPolicyFd < 0) return false;
217 
218     for (uint32_t i = 0; i < gPolicyCpus.size(); ++i) {
219         for (auto &cpu : gPolicyCpus[i]) {
220             if (writeToMapEntry(cpuPolicyFd, &cpu, &i, BPF_ANY)) return false;
221         }
222     }
223 
224     unique_fd freqToIdxFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_freq_to_idx_map"));
225     if (freqToIdxFd < 0) return false;
226     freq_idx_key_t key;
227     for (uint32_t i = 0; i < gNPolicies; ++i) {
228         key.policy = i;
229         for (uint32_t j = 0; j < gPolicyFreqs[i].size(); ++j) {
230             key.freq = gPolicyFreqs[i][j];
231             // Start indexes at 1 so that uninitialized state is distinguishable from lowest freq.
232             // The uid_times map still uses 0-based indexes, and the sched_switch program handles
233             // conversion between them, so this does not affect our map reading code.
234             uint32_t idx = j + 1;
235             if (writeToMapEntry(freqToIdxFd, &key, &idx, BPF_ANY)) return false;
236         }
237     }
238 
239     unique_fd cpuLastUpdateFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_cpu_last_update_map"));
240     if (cpuLastUpdateFd < 0) return false;
241     std::vector<uint64_t> zeros(get_nprocs_conf(), 0);
242     uint32_t zero = 0;
243     if (writeToMapEntry(cpuLastUpdateFd, &zero, zeros.data(), BPF_ANY)) return false;
244 
245     unique_fd nrActiveFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_nr_active_map"));
246     if (nrActiveFd < 0) return false;
247     if (writeToMapEntry(nrActiveFd, &zero, &zero, BPF_ANY)) return false;
248 
249     unique_fd policyNrActiveFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_policy_nr_active_map"));
250     if (policyNrActiveFd < 0) return false;
251     for (uint32_t i = 0; i < gNPolicies; ++i) {
252         if (writeToMapEntry(policyNrActiveFd, &i, &zero, BPF_ANY)) return false;
253     }
254 
255     unique_fd policyFreqIdxFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_policy_freq_idx_map"));
256     if (policyFreqIdxFd < 0) return false;
257     for (uint32_t i = 0; i < gNPolicies; ++i) {
258         auto freqIdx = getPolicyFreqIdx(i);
259         if (!freqIdx.has_value()) return false;
260         if (writeToMapEntry(policyFreqIdxFd, &i, &(*freqIdx), BPF_ANY)) return false;
261     }
262 
263     gTracking = attachTracepointProgram("sched", "sched_switch") &&
264             attachTracepointProgram("power", "cpu_frequency") &&
265             attachTracepointProgram("sched", "sched_process_free");
266     return gTracking;
267 }
268 
getCpuFreqs()269 std::optional<std::vector<std::vector<uint32_t>>> getCpuFreqs() {
270     if (!gInitialized && !initGlobals()) return {};
271     return gPolicyFreqs;
272 }
273 
getTotalCpuFreqTimes()274 std::optional<std::vector<std::vector<uint64_t>>> getTotalCpuFreqTimes() {
275     if (!gInitialized && !initGlobals()) return {};
276 
277     std::vector<std::vector<uint64_t>> out;
278     uint32_t maxFreqCount = 0;
279     for (const auto &freqList : gPolicyFreqs) {
280         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
281         out.emplace_back(freqList.size(), 0);
282     }
283 
284     std::vector<uint64_t> vals(gNCpus);
285     const uint32_t freqCount = maxFreqCount <= MAX_FREQS_FOR_TOTAL ? maxFreqCount :
286             MAX_FREQS_FOR_TOTAL;
287     for (uint32_t freqIdx = 0; freqIdx < freqCount; ++freqIdx) {
288         if (findMapEntry(gTisTotalMapFd, &freqIdx, vals.data())) return {};
289         for (uint32_t policyIdx = 0; policyIdx < gNPolicies; ++policyIdx) {
290             if (freqIdx >= gPolicyFreqs[policyIdx].size()) continue;
291             for (const auto &cpu : gPolicyCpus[policyIdx]) {
292                 out[policyIdx][freqIdx] += vals[gCpuIndexMap[cpu]];
293             }
294         }
295     }
296 
297     return out;
298 }
299 // Retrieve the times in ns that uid spent running at each CPU frequency.
300 // Return contains no value on error, otherwise it contains a vector of vectors using the format:
301 // [[t0_0, t0_1, ...],
302 //  [t1_0, t1_1, ...], ...]
303 // where ti_j is the ns that uid spent running on the ith cluster at that cluster's jth lowest freq.
getUidCpuFreqTimes(uint32_t uid)304 std::optional<std::vector<std::vector<uint64_t>>> getUidCpuFreqTimes(uint32_t uid) {
305     if (!gInitialized && !initGlobals()) return {};
306 
307     std::vector<std::vector<uint64_t>> out;
308     uint32_t maxFreqCount = 0;
309     for (const auto &freqList : gPolicyFreqs) {
310         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
311         out.emplace_back(freqList.size(), 0);
312     }
313 
314     std::vector<tis_val_t> vals(gNCpus);
315     for (uint32_t i = 0; i <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++i) {
316         const time_key_t key = {.uid = uid, .bucket = i};
317         if (findMapEntry(gTisMapFd, &key, vals.data())) {
318             time_key_t tmpKey;
319             if (errno != ENOENT || getFirstMapKey(gTisMapFd, &tmpKey)) return {};
320             continue;
321         }
322 
323         auto offset = i * FREQS_PER_ENTRY;
324         auto nextOffset = (i + 1) * FREQS_PER_ENTRY;
325         for (uint32_t j = 0; j < gNPolicies; ++j) {
326             if (offset >= gPolicyFreqs[j].size()) continue;
327             auto begin = out[j].begin() + offset;
328             auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY : out[j].end();
329 
330             for (const auto &cpu : gPolicyCpus[j]) {
331                 std::transform(begin, end, std::begin(vals[gCpuIndexMap[cpu]].ar), begin,
332                                std::plus<uint64_t>());
333             }
334         }
335     }
336 
337     return out;
338 }
339 
uidUpdatedSince(uint32_t uid,uint64_t lastUpdate,uint64_t * newLastUpdate)340 static std::optional<bool> uidUpdatedSince(uint32_t uid, uint64_t lastUpdate,
341                                            uint64_t *newLastUpdate) {
342     uint64_t uidLastUpdate;
343     if (findMapEntry(gUidLastUpdateMapFd, &uid, &uidLastUpdate)) return {};
344     // Updates that occurred during the previous read may have been missed. To mitigate
345     // this, don't ignore entries updated up to 1s before *lastUpdate
346     constexpr uint64_t NSEC_PER_SEC = 1000000000;
347     if (uidLastUpdate + NSEC_PER_SEC < lastUpdate) return false;
348     if (uidLastUpdate > *newLastUpdate) *newLastUpdate = uidLastUpdate;
349     return true;
350 }
351 
352 // Retrieve the times in ns that each uid spent running at each CPU freq.
353 // Return contains no value on error, otherwise it contains a map from uids to vectors of vectors
354 // using the format:
355 // { uid0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
356 //   uid1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
357 // where ti_j_k is the ns uid i spent running on the jth cluster at the cluster's kth lowest freq.
358 std::optional<std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>>>
getUidsCpuFreqTimes()359 getUidsCpuFreqTimes() {
360     return getUidsUpdatedCpuFreqTimes(nullptr);
361 }
362 
363 // Retrieve the times in ns that each uid spent running at each CPU freq, excluding UIDs that have
364 // not run since before lastUpdate.
365 // Return format is the same as getUidsCpuFreqTimes()
366 std::optional<std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>>>
getUidsUpdatedCpuFreqTimes(uint64_t * lastUpdate)367 getUidsUpdatedCpuFreqTimes(uint64_t *lastUpdate) {
368     if (!gInitialized && !initGlobals()) return {};
369     time_key_t key, prevKey;
370     std::unordered_map<uint32_t, std::vector<std::vector<uint64_t>>> map;
371     if (getFirstMapKey(gTisMapFd, &key)) {
372         if (errno == ENOENT) return map;
373         return std::nullopt;
374     }
375 
376     std::vector<std::vector<uint64_t>> mapFormat;
377     for (const auto &freqList : gPolicyFreqs) mapFormat.emplace_back(freqList.size(), 0);
378 
379     uint64_t newLastUpdate = lastUpdate ? *lastUpdate : 0;
380     std::vector<tis_val_t> vals(gNCpus);
381     do {
382         if (lastUpdate) {
383             auto uidUpdated = uidUpdatedSince(key.uid, *lastUpdate, &newLastUpdate);
384             if (!uidUpdated.has_value()) return {};
385             if (!*uidUpdated) continue;
386         }
387         if (findMapEntry(gTisMapFd, &key, vals.data())) return {};
388         if (map.find(key.uid) == map.end()) map.emplace(key.uid, mapFormat);
389 
390         auto offset = key.bucket * FREQS_PER_ENTRY;
391         auto nextOffset = (key.bucket + 1) * FREQS_PER_ENTRY;
392         for (uint32_t i = 0; i < gNPolicies; ++i) {
393             if (offset >= gPolicyFreqs[i].size()) continue;
394             auto begin = map[key.uid][i].begin() + offset;
395             auto end = nextOffset < gPolicyFreqs[i].size() ? begin + FREQS_PER_ENTRY :
396                 map[key.uid][i].end();
397             for (const auto &cpu : gPolicyCpus[i]) {
398                 std::transform(begin, end, std::begin(vals[gCpuIndexMap[cpu]].ar), begin,
399                                std::plus<uint64_t>());
400             }
401         }
402         prevKey = key;
403     } while (prevKey = key, !getNextMapKey(gTisMapFd, &prevKey, &key));
404     if (errno != ENOENT) return {};
405     if (lastUpdate && newLastUpdate > *lastUpdate) *lastUpdate = newLastUpdate;
406     return map;
407 }
408 
verifyConcurrentTimes(const concurrent_time_t & ct)409 static bool verifyConcurrentTimes(const concurrent_time_t &ct) {
410     uint64_t activeSum = std::accumulate(ct.active.begin(), ct.active.end(), (uint64_t)0);
411     uint64_t policySum = 0;
412     for (const auto &vec : ct.policy) {
413         policySum += std::accumulate(vec.begin(), vec.end(), (uint64_t)0);
414     }
415     return activeSum == policySum;
416 }
417 
418 // Retrieve the times in ns that uid spent running concurrently with each possible number of other
419 // tasks on each cluster (policy times) and overall (active times).
420 // Return contains no value on error, otherwise it contains a concurrent_time_t with the format:
421 // {.active = [a0, a1, ...], .policy = [[p0_0, p0_1, ...], [p1_0, p1_1, ...], ...]}
422 // where ai is the ns spent running concurrently with tasks on i other cpus and pi_j is the ns spent
423 // running on the ith cluster, concurrently with tasks on j other cpus in the same cluster
getUidConcurrentTimes(uint32_t uid,bool retry)424 std::optional<concurrent_time_t> getUidConcurrentTimes(uint32_t uid, bool retry) {
425     if (!gInitialized && !initGlobals()) return {};
426     concurrent_time_t ret = {.active = std::vector<uint64_t>(gNCpus, 0)};
427     for (const auto &cpuList : gPolicyCpus) ret.policy.emplace_back(cpuList.size(), 0);
428     std::vector<concurrent_val_t> vals(gNCpus);
429     for (uint32_t i = 0; i <= (gNCpus - 1) / CPUS_PER_ENTRY; ++i) {
430         const time_key_t key = {.uid = uid, .bucket = i};
431         if (findMapEntry(gConcurrentMapFd, &key, vals.data())) {
432             time_key_t tmpKey;
433             if (errno != ENOENT || getFirstMapKey(gConcurrentMapFd, &tmpKey)) return {};
434             continue;
435         }
436         auto offset = key.bucket * CPUS_PER_ENTRY;
437         auto nextOffset = (key.bucket + 1) * CPUS_PER_ENTRY;
438 
439         auto activeBegin = ret.active.begin() + offset;
440         auto activeEnd = nextOffset < gNCpus ? activeBegin + CPUS_PER_ENTRY : ret.active.end();
441 
442         for (uint32_t cpu = 0; cpu < gNCpus; ++cpu) {
443             std::transform(activeBegin, activeEnd, std::begin(vals[cpu].active), activeBegin,
444                            std::plus<uint64_t>());
445         }
446 
447         for (uint32_t policy = 0; policy < gNPolicies; ++policy) {
448             if (offset >= gPolicyCpus[policy].size()) continue;
449             auto policyBegin = ret.policy[policy].begin() + offset;
450             auto policyEnd = nextOffset < gPolicyCpus[policy].size() ? policyBegin + CPUS_PER_ENTRY
451                                                                      : ret.policy[policy].end();
452 
453             for (const auto &cpu : gPolicyCpus[policy]) {
454                 std::transform(policyBegin, policyEnd, std::begin(vals[gCpuIndexMap[cpu]].policy),
455                                policyBegin, std::plus<uint64_t>());
456             }
457         }
458     }
459     if (!verifyConcurrentTimes(ret) && retry)  return getUidConcurrentTimes(uid, false);
460     return ret;
461 }
462 
463 // Retrieve the times in ns that each uid spent running concurrently with each possible number of
464 // other tasks on each cluster (policy times) and overall (active times).
465 // Return contains no value on error, otherwise it contains a map from uids to concurrent_time_t's
466 // using the format:
467 // { uid0 -> {.active = [a0, a1, ...], .policy = [[p0_0, p0_1, ...], [p1_0, p1_1, ...], ...] }, ...}
468 // where ai is the ns spent running concurrently with tasks on i other cpus and pi_j is the ns spent
469 // running on the ith cluster, concurrently with tasks on j other cpus in the same cluster.
getUidsConcurrentTimes()470 std::optional<std::unordered_map<uint32_t, concurrent_time_t>> getUidsConcurrentTimes() {
471     return getUidsUpdatedConcurrentTimes(nullptr);
472 }
473 
474 // Retrieve the times in ns that each uid spent running concurrently with each possible number of
475 // other tasks on each cluster (policy times) and overall (active times), excluding UIDs that have
476 // not run since before lastUpdate.
477 // Return format is the same as getUidsConcurrentTimes()
getUidsUpdatedConcurrentTimes(uint64_t * lastUpdate)478 std::optional<std::unordered_map<uint32_t, concurrent_time_t>> getUidsUpdatedConcurrentTimes(
479         uint64_t *lastUpdate) {
480     if (!gInitialized && !initGlobals()) return {};
481     time_key_t key, prevKey;
482     std::unordered_map<uint32_t, concurrent_time_t> ret;
483     if (getFirstMapKey(gConcurrentMapFd, &key)) {
484         if (errno == ENOENT) return ret;
485         return {};
486     }
487 
488     concurrent_time_t retFormat = {.active = std::vector<uint64_t>(gNCpus, 0)};
489     for (const auto &cpuList : gPolicyCpus) retFormat.policy.emplace_back(cpuList.size(), 0);
490 
491     std::vector<concurrent_val_t> vals(gNCpus);
492     std::vector<uint64_t>::iterator activeBegin, activeEnd, policyBegin, policyEnd;
493 
494     uint64_t newLastUpdate = lastUpdate ? *lastUpdate : 0;
495     do {
496         if (key.bucket > (gNCpus - 1) / CPUS_PER_ENTRY) return {};
497         if (lastUpdate) {
498             auto uidUpdated = uidUpdatedSince(key.uid, *lastUpdate, &newLastUpdate);
499             if (!uidUpdated.has_value()) return {};
500             if (!*uidUpdated) continue;
501         }
502         if (findMapEntry(gConcurrentMapFd, &key, vals.data())) return {};
503         if (ret.find(key.uid) == ret.end()) ret.emplace(key.uid, retFormat);
504 
505         auto offset = key.bucket * CPUS_PER_ENTRY;
506         auto nextOffset = (key.bucket + 1) * CPUS_PER_ENTRY;
507 
508         activeBegin = ret[key.uid].active.begin();
509         activeEnd = nextOffset < gNCpus ? activeBegin + CPUS_PER_ENTRY : ret[key.uid].active.end();
510 
511         for (uint32_t cpu = 0; cpu < gNCpus; ++cpu) {
512             std::transform(activeBegin, activeEnd, std::begin(vals[cpu].active), activeBegin,
513                            std::plus<uint64_t>());
514         }
515 
516         for (uint32_t policy = 0; policy < gNPolicies; ++policy) {
517             if (offset >= gPolicyCpus[policy].size()) continue;
518             policyBegin = ret[key.uid].policy[policy].begin() + offset;
519             policyEnd = nextOffset < gPolicyCpus[policy].size() ? policyBegin + CPUS_PER_ENTRY
520                                                                 : ret[key.uid].policy[policy].end();
521 
522             for (const auto &cpu : gPolicyCpus[policy]) {
523                 std::transform(policyBegin, policyEnd, std::begin(vals[gCpuIndexMap[cpu]].policy),
524                                policyBegin, std::plus<uint64_t>());
525             }
526         }
527     } while (prevKey = key, !getNextMapKey(gConcurrentMapFd, &prevKey, &key));
528     if (errno != ENOENT) return {};
529     for (const auto &[key, value] : ret) {
530         if (!verifyConcurrentTimes(value)) {
531             auto val = getUidConcurrentTimes(key, false);
532             if (val.has_value()) ret[key] = val.value();
533         }
534     }
535     if (lastUpdate && newLastUpdate > *lastUpdate) *lastUpdate = newLastUpdate;
536     return ret;
537 }
538 
539 // Clear all time in state data for a given uid. Returns false on error, true otherwise.
540 // This is only suitable for clearing data when an app is uninstalled; if called on a UID with
541 // running tasks it will cause time in state vs. concurrent time totals to be inconsistent for that
542 // UID.
clearUidTimes(uint32_t uid)543 bool clearUidTimes(uint32_t uid) {
544     if (!gInitialized && !initGlobals()) return false;
545 
546     time_key_t key = {.uid = uid};
547 
548     uint32_t maxFreqCount = 0;
549     for (const auto &freqList : gPolicyFreqs) {
550         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
551     }
552 
553     tis_val_t zeros = {0};
554     std::vector<tis_val_t> vals(gNCpus, zeros);
555     for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
556         if (writeToMapEntry(gTisMapFd, &key, vals.data(), BPF_EXIST) && errno != ENOENT)
557             return false;
558         if (deleteMapEntry(gTisMapFd, &key) && errno != ENOENT) return false;
559     }
560 
561     concurrent_val_t czeros = { .active = {0}, .policy = {0}, };
562     std::vector<concurrent_val_t> cvals(gNCpus, czeros);
563     for (key.bucket = 0; key.bucket <= (gNCpus - 1) / CPUS_PER_ENTRY; ++key.bucket) {
564         if (writeToMapEntry(gConcurrentMapFd, &key, cvals.data(), BPF_EXIST) && errno != ENOENT)
565             return false;
566         if (deleteMapEntry(gConcurrentMapFd, &key) && errno != ENOENT) return false;
567     }
568 
569     if (deleteMapEntry(gUidLastUpdateMapFd, &uid) && errno != ENOENT) return false;
570     return true;
571 }
572 
startTrackingProcessCpuTimes(pid_t pid)573 bool startTrackingProcessCpuTimes(pid_t pid) {
574     if (!gInitialized && !initGlobals()) return false;
575 
576     unique_fd trackedPidHashMapFd(
577             mapRetrieveWO(BPF_FS_PATH "map_timeInState_pid_tracked_hash_map"));
578     if (trackedPidHashMapFd < 0) return false;
579 
580     unique_fd trackedPidMapFd(mapRetrieveWO(BPF_FS_PATH "map_timeInState_pid_tracked_map"));
581     if (trackedPidMapFd < 0) return false;
582 
583     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
584         // Find first available [index, pid] entry in the pid_tracked_hash_map map
585         if (writeToMapEntry(trackedPidHashMapFd, &index, &pid, BPF_NOEXIST) != 0) {
586             if (errno != EEXIST) {
587                 return false;
588             }
589             continue; // This index is already taken
590         }
591 
592         tracked_pid_t tracked_pid = {.pid = pid, .state = TRACKED_PID_STATE_ACTIVE};
593         if (writeToMapEntry(trackedPidMapFd, &index, &tracked_pid, BPF_ANY) != 0) {
594             return false;
595         }
596         return true;
597     }
598     return false;
599 }
600 
601 // Marks the specified task identified by its PID (aka TID) for CPU time-in-state tracking
602 // aggregated with other tasks sharing the same TGID and aggregation key.
startAggregatingTaskCpuTimes(pid_t pid,uint16_t aggregationKey)603 bool startAggregatingTaskCpuTimes(pid_t pid, uint16_t aggregationKey) {
604     if (!gInitialized && !initGlobals()) return false;
605 
606     unique_fd taskAggregationMapFd(
607             mapRetrieveWO(BPF_FS_PATH "map_timeInState_pid_task_aggregation_map"));
608     if (taskAggregationMapFd < 0) return false;
609 
610     return writeToMapEntry(taskAggregationMapFd, &pid, &aggregationKey, BPF_ANY) == 0;
611 }
612 
613 // Retrieves the times in ns that each thread spent running at each CPU freq, aggregated by
614 // aggregation key.
615 // Return contains no value on error, otherwise it contains a map from aggregation keys
616 // to vectors of vectors using the format:
617 // { aggKey0 -> [[t0_0_0, t0_0_1, ...], [t0_1_0, t0_1_1, ...], ...],
618 //   aggKey1 -> [[t1_0_0, t1_0_1, ...], [t1_1_0, t1_1_1, ...], ...], ... }
619 // where ti_j_k is the ns tid i spent running on the jth cluster at the cluster's kth lowest freq.
620 std::optional<std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>>>
getAggregatedTaskCpuFreqTimes(pid_t tgid,const std::vector<uint16_t> & aggregationKeys)621 getAggregatedTaskCpuFreqTimes(pid_t tgid, const std::vector<uint16_t> &aggregationKeys) {
622     if (!gInitialized && !initGlobals()) return {};
623 
624     uint32_t maxFreqCount = 0;
625     std::vector<std::vector<uint64_t>> mapFormat;
626     for (const auto &freqList : gPolicyFreqs) {
627         if (freqList.size() > maxFreqCount) maxFreqCount = freqList.size();
628         mapFormat.emplace_back(freqList.size(), 0);
629     }
630 
631     bool dataCollected = false;
632     std::unordered_map<uint16_t, std::vector<std::vector<uint64_t>>> map;
633     std::vector<tis_val_t> vals(gNCpus);
634     for (uint16_t aggregationKey : aggregationKeys) {
635         map.emplace(aggregationKey, mapFormat);
636 
637         aggregated_task_tis_key_t key{.tgid = tgid, .aggregation_key = aggregationKey};
638         for (key.bucket = 0; key.bucket <= (maxFreqCount - 1) / FREQS_PER_ENTRY; ++key.bucket) {
639             if (findMapEntry(gPidTisMapFd, &key, vals.data()) != 0) {
640                 if (errno != ENOENT) {
641                     return {};
642                 }
643                 continue;
644             } else {
645                 dataCollected = true;
646             }
647 
648             // Combine data by aggregating time-in-state data grouped by CPU cluster aka policy.
649             uint32_t offset = key.bucket * FREQS_PER_ENTRY;
650             uint32_t nextOffset = offset + FREQS_PER_ENTRY;
651             for (uint32_t j = 0; j < gNPolicies; ++j) {
652                 if (offset >= gPolicyFreqs[j].size()) continue;
653                 auto begin = map[key.aggregation_key][j].begin() + offset;
654                 auto end = nextOffset < gPolicyFreqs[j].size() ? begin + FREQS_PER_ENTRY
655                                                                : map[key.aggregation_key][j].end();
656                 for (const auto &cpu : gPolicyCpus[j]) {
657                     std::transform(begin, end, std::begin(vals[gCpuIndexMap[cpu]].ar), begin,
658                                    std::plus<uint64_t>());
659                 }
660             }
661         }
662     }
663 
664     if (!dataCollected) {
665         // Check if eBPF is supported on this device. If it is, gTisMap should not be empty.
666         time_key_t key;
667         if (getFirstMapKey(gTisMapFd, &key) != 0) {
668             return {};
669         }
670     }
671     return map;
672 }
673 
674 } // namespace bpf
675 } // namespace android
676