1 /*
2 * timeInState eBPF program
3 *
4 * Copyright (C) 2018 Google
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17 #ifdef MOCK_BPF
18 #include <test/mock_bpf_helpers.h>
19 #else
20 #include <bpf_helpers.h>
21 #endif
22
23 #include <bpf_timeinstate.h>
24
25 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
26 AID_SYSTEM)
27
28 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
29
30 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024,
31 AID_SYSTEM)
32 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
33
34 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
35 DEFINE_BPF_MAP_GWO(cpu_last_pid_map, PERCPU_ARRAY, uint32_t, pid_t, 1, AID_SYSTEM)
36
37 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
38 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
39
40 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
41
42 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
43 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
44
45 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
46 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
47 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
48 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
49 AID_SYSTEM)
50
51 struct switch_args {
52 unsigned long long ignore;
53 char prev_comm[16];
54 int prev_pid;
55 int prev_prio;
56 long long prev_state;
57 char next_comm[16];
58 int next_pid;
59 int next_prio;
60 };
61
update_uid(uint32_t uid,uint64_t delta,uint64_t time,uint8_t freq_idx,uint32_t active,uint32_t policy_active)62 static inline __always_inline void update_uid(uint32_t uid, uint64_t delta, uint64_t time,
63 uint8_t freq_idx, uint32_t active,
64 uint32_t policy_active) {
65 time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
66 tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
67 if (!val) {
68 tis_val_t zero_val = {.ar = {0}};
69 bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
70 val = bpf_uid_time_in_state_map_lookup_elem(&key);
71 }
72 if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
73
74 key.bucket = active / CPUS_PER_ENTRY;
75 concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
76 if (!ct) {
77 concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
78 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
79 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
80 }
81 if (ct) ct->active[active % CPUS_PER_ENTRY] += delta;
82
83 if (policy_active / CPUS_PER_ENTRY != key.bucket) {
84 key.bucket = policy_active / CPUS_PER_ENTRY;
85 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
86 if (!ct) {
87 concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
88 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
89 ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
90 }
91 }
92 if (ct) ct->policy[policy_active % CPUS_PER_ENTRY] += delta;
93 uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
94 if (uid_last_update) {
95 *uid_last_update = time;
96 } else {
97 bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
98 }
99 return;
100 }
101
102 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM, tp_sched_switch)
103 (struct switch_args* args) {
104 const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events.
105 uint32_t zero = 0;
106 uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
107 if (!last) return ALLOW;
108 uint64_t old_last = *last;
109 uint64_t time = bpf_ktime_get_ns();
110 *last = time;
111
112 // With suspend-to-ram, it's possible to see prev_pid==0 twice in a row on the same CPU. Add a
113 // check to ensure prev_pid matches the previous next_pid to avoid incorrectly incrementing our
114 // active CPU counts a second time in this scenario.
115 pid_t *cpu_pidp = bpf_cpu_last_pid_map_lookup_elem(&zero);
116 if (!cpu_pidp) return ALLOW;
117 pid_t cpu_pid = *cpu_pidp;
118 *cpu_pidp = args->next_pid;
119 if (old_last && args->prev_pid != cpu_pid) return ALLOW;
120
121 uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
122 if (!active) return ALLOW;
123
124 uint32_t cpu = bpf_get_smp_processor_id();
125 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
126 if (!policyp) return ALLOW;
127 uint32_t policy = *policyp;
128
129 uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
130 if (!policy_active) return ALLOW;
131
132 uint32_t nactive = *active - 1;
133 uint32_t policy_nactive = *policy_active - 1;
134
135 if (!args->prev_pid || (!old_last && args->next_pid)) {
136 __sync_fetch_and_add(active, 1);
137 __sync_fetch_and_add(policy_active, 1);
138 }
139
140 // Return here in 2 scenarios:
141 // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
142 // decreasing.
143 // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
144 // and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
145 // if we're going idle.
146 if (!args->prev_pid || !old_last) return ALLOW;
147
148 if (!args->next_pid) {
149 __sync_fetch_and_add(active, -1);
150 __sync_fetch_and_add(policy_active, -1);
151 }
152
153 uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
154 if (!freq_idxp || !*freq_idxp) return ALLOW;
155 // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
156 uint8_t freq_idx = *freq_idxp - 1;
157
158 uint32_t uid = bpf_get_current_uid_gid();
159 uint64_t delta = time - old_last;
160
161 // For UIDs in the SDK sandbox range, we account per-UID times twice, both to the corresponding
162 // app uid and to the "virtual" UID AID_SDK_SANDBOX which is reserved for collecting total times
163 // across all SDK sandbox UIDs. Special handling for this reserved UID in framework code
164 // prevents double counting in systemwide totals.
165 if (((uid % AID_USER_OFFSET) >= AID_SDK_SANDBOX_PROCESS_START) &&
166 ((uid % AID_USER_OFFSET) <= AID_SDK_SANDBOX_PROCESS_END)) {
167 uid -= AID_SDK_SANDBOX_PROCESS_START - AID_APP_START;
168 update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
169 update_uid(AID_SDK_SANDBOX, delta, time, freq_idx, nactive, policy_nactive);
170 } else {
171 update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
172 }
173
174 // Add delta to total.
175 const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
176 MAX_FREQS_FOR_TOTAL - 1;
177 uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
178 if (total) *total += delta;
179
180 const int pid = args->prev_pid;
181 const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
182 bool is_tgid_tracked = false;
183
184 // eBPF verifier does not currently allow loops.
185 // Instruct the C compiler to unroll the loop into a series of steps.
186 #pragma unroll
187 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
188 const uint32_t key = index;
189 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
190 if (!tracked_pid) continue;
191 if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
192 // Reached the end of the list
193 break;
194 }
195
196 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
197 is_tgid_tracked = true;
198 break;
199 }
200 }
201
202 if (is_tgid_tracked) {
203 // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
204 // with other threads sharing the same TGID and aggregation key.
205 uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
206 aggregated_task_tis_key_t task_key = {
207 .tgid = tgid,
208 .aggregation_key = aggregation_key ? *aggregation_key : 0,
209 .bucket = freq_idx / FREQS_PER_ENTRY};
210 tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
211 if (!task_val) {
212 tis_val_t zero_val = {.ar = {0}};
213 bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
214 task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
215 }
216 if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
217 }
218 return ALLOW;
219 }
220
221 struct cpufreq_args {
222 unsigned long long ignore;
223 unsigned int state;
224 unsigned int cpu_id;
225 };
226
227 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM, tp_cpufreq)
228 (struct cpufreq_args* args) {
229 const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events.
230 uint32_t cpu = args->cpu_id;
231 unsigned int new = args->state;
232 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
233 if (!policyp) return ALLOW;
234 uint32_t policy = *policyp;
235 freq_idx_key_t key = {.policy = policy, .freq = new};
236 uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
237 if (!idxp) return ALLOW;
238 uint8_t idx = *idxp;
239 bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
240 return ALLOW;
241 }
242
243 // The format of the sched/sched_process_free event is described in
244 // adb shell cat /d/tracing/events/sched/sched_process_free/format
245 struct sched_process_free_args {
246 unsigned long long ignore;
247 char comm[16];
248 pid_t pid;
249 int prio;
250 };
251
252 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM, tp_sched_process_free)
253 (struct sched_process_free_args* args) {
254 const int ALLOW = 1;
255
256 int pid = args->pid;
257 bool is_last = true;
258
259 // eBPF verifier does not currently allow loops.
260 // Instruct the C compiler to unroll the loop into a series of steps.
261 #pragma unroll
262 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
263 const uint32_t key = MAX_TRACKED_PIDS - index - 1;
264 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
265 if (!tracked_pid) continue;
266 if (tracked_pid->pid == pid) {
267 tracked_pid->pid = 0;
268 tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
269 bpf_pid_tracked_hash_map_delete_elem(&key);
270 break;
271 }
272 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
273 is_last = false;
274 }
275 }
276
277 bpf_pid_task_aggregation_map_delete_elem(&pid);
278 return ALLOW;
279 }
280
281 LICENSE("GPL");
282