1 /*
2  * timeInState eBPF program
3  *
4  * Copyright (C) 2018 Google
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  */
16 
17 #ifdef MOCK_BPF
18 #include <test/mock_bpf_helpers.h>
19 #else
20 #include <bpf_helpers.h>
21 #endif
22 
23 #include <bpf_timeinstate.h>
24 
25 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
26                    AID_SYSTEM)
27 
28 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
29 
30 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024,
31                    AID_SYSTEM)
32 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
33 
34 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
35 DEFINE_BPF_MAP_GWO(cpu_last_pid_map, PERCPU_ARRAY, uint32_t, pid_t, 1, AID_SYSTEM)
36 
37 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
38 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
39 
40 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
41 
42 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
43 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
44 
45 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
46 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
47 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
48 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
49                    AID_SYSTEM)
50 
51 struct switch_args {
52     unsigned long long ignore;
53     char prev_comm[16];
54     int prev_pid;
55     int prev_prio;
56     long long prev_state;
57     char next_comm[16];
58     int next_pid;
59     int next_prio;
60 };
61 
update_uid(uint32_t uid,uint64_t delta,uint64_t time,uint8_t freq_idx,uint32_t active,uint32_t policy_active)62 static inline __always_inline void update_uid(uint32_t uid, uint64_t delta, uint64_t time,
63                                               uint8_t freq_idx, uint32_t active,
64                                               uint32_t policy_active) {
65     time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
66     tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
67     if (!val) {
68         tis_val_t zero_val = {.ar = {0}};
69         bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
70         val = bpf_uid_time_in_state_map_lookup_elem(&key);
71     }
72     if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
73 
74     key.bucket = active / CPUS_PER_ENTRY;
75     concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
76     if (!ct) {
77         concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
78         bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
79         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
80     }
81     if (ct) ct->active[active % CPUS_PER_ENTRY] += delta;
82 
83     if (policy_active / CPUS_PER_ENTRY != key.bucket) {
84         key.bucket = policy_active / CPUS_PER_ENTRY;
85         ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
86         if (!ct) {
87             concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
88             bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
89             ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
90         }
91     }
92     if (ct) ct->policy[policy_active % CPUS_PER_ENTRY] += delta;
93     uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
94     if (uid_last_update) {
95         *uid_last_update = time;
96     } else {
97         bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
98     }
99     return;
100 }
101 
102 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM, tp_sched_switch)
103 (struct switch_args* args) {
104     const int ALLOW = 1;  // return 1 to avoid blocking simpleperf from receiving events.
105     uint32_t zero = 0;
106     uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
107     if (!last) return ALLOW;
108     uint64_t old_last = *last;
109     uint64_t time = bpf_ktime_get_ns();
110     *last = time;
111 
112     // With suspend-to-ram, it's possible to see prev_pid==0 twice in a row on the same CPU. Add a
113     // check to ensure prev_pid matches the previous next_pid to avoid incorrectly incrementing our
114     // active CPU counts a second time in this scenario.
115     pid_t *cpu_pidp = bpf_cpu_last_pid_map_lookup_elem(&zero);
116     if (!cpu_pidp) return ALLOW;
117     pid_t cpu_pid = *cpu_pidp;
118     *cpu_pidp = args->next_pid;
119     if (old_last && args->prev_pid != cpu_pid) return ALLOW;
120 
121     uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
122     if (!active) return ALLOW;
123 
124     uint32_t cpu = bpf_get_smp_processor_id();
125     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
126     if (!policyp) return ALLOW;
127     uint32_t policy = *policyp;
128 
129     uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
130     if (!policy_active) return ALLOW;
131 
132     uint32_t nactive = *active - 1;
133     uint32_t policy_nactive = *policy_active - 1;
134 
135     if (!args->prev_pid || (!old_last && args->next_pid)) {
136         __sync_fetch_and_add(active, 1);
137         __sync_fetch_and_add(policy_active, 1);
138     }
139 
140     // Return here in 2 scenarios:
141     // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
142     //    decreasing.
143     // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
144     //    and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
145     //    if we're going idle.
146     if (!args->prev_pid || !old_last) return ALLOW;
147 
148     if (!args->next_pid) {
149         __sync_fetch_and_add(active, -1);
150         __sync_fetch_and_add(policy_active, -1);
151     }
152 
153     uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
154     if (!freq_idxp || !*freq_idxp) return ALLOW;
155     // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
156     uint8_t freq_idx = *freq_idxp - 1;
157 
158     uint32_t uid = bpf_get_current_uid_gid();
159     uint64_t delta = time - old_last;
160 
161     // For UIDs in the SDK sandbox range, we account per-UID times twice, both to the corresponding
162     // app uid and to the "virtual" UID AID_SDK_SANDBOX which is reserved for collecting total times
163     // across all SDK sandbox UIDs. Special handling for this reserved UID in framework code
164     // prevents double counting in systemwide totals.
165     if (((uid % AID_USER_OFFSET) >= AID_SDK_SANDBOX_PROCESS_START) &&
166         ((uid % AID_USER_OFFSET) <= AID_SDK_SANDBOX_PROCESS_END)) {
167         uid -= AID_SDK_SANDBOX_PROCESS_START - AID_APP_START;
168         update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
169         update_uid(AID_SDK_SANDBOX, delta, time, freq_idx, nactive, policy_nactive);
170     } else {
171         update_uid(uid, delta, time, freq_idx, nactive, policy_nactive);
172     }
173 
174     // Add delta to total.
175     const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
176                                     MAX_FREQS_FOR_TOTAL - 1;
177     uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
178     if (total) *total += delta;
179 
180     const int pid = args->prev_pid;
181     const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
182     bool is_tgid_tracked = false;
183 
184     // eBPF verifier does not currently allow loops.
185     // Instruct the C compiler to unroll the loop into a series of steps.
186     #pragma unroll
187     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
188         const uint32_t key = index;
189         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
190         if (!tracked_pid) continue;
191         if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
192             // Reached the end of the list
193             break;
194         }
195 
196         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
197             is_tgid_tracked = true;
198             break;
199         }
200     }
201 
202     if (is_tgid_tracked) {
203         // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
204         // with other threads sharing the same TGID and aggregation key.
205         uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
206         aggregated_task_tis_key_t task_key = {
207                 .tgid = tgid,
208                 .aggregation_key = aggregation_key ? *aggregation_key : 0,
209                 .bucket = freq_idx / FREQS_PER_ENTRY};
210         tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
211         if (!task_val) {
212             tis_val_t zero_val = {.ar = {0}};
213             bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
214             task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
215         }
216         if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
217     }
218     return ALLOW;
219 }
220 
221 struct cpufreq_args {
222     unsigned long long ignore;
223     unsigned int state;
224     unsigned int cpu_id;
225 };
226 
227 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM, tp_cpufreq)
228 (struct cpufreq_args* args) {
229     const int ALLOW = 1;  // return 1 to avoid blocking simpleperf from receiving events.
230     uint32_t cpu = args->cpu_id;
231     unsigned int new = args->state;
232     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
233     if (!policyp) return ALLOW;
234     uint32_t policy = *policyp;
235     freq_idx_key_t key = {.policy = policy, .freq = new};
236     uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
237     if (!idxp) return ALLOW;
238     uint8_t idx = *idxp;
239     bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
240     return ALLOW;
241 }
242 
243 // The format of the sched/sched_process_free event is described in
244 // adb shell cat /d/tracing/events/sched/sched_process_free/format
245 struct sched_process_free_args {
246     unsigned long long ignore;
247     char comm[16];
248     pid_t pid;
249     int prio;
250 };
251 
252 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM, tp_sched_process_free)
253 (struct sched_process_free_args* args) {
254     const int ALLOW = 1;
255 
256     int pid = args->pid;
257     bool is_last = true;
258 
259     // eBPF verifier does not currently allow loops.
260     // Instruct the C compiler to unroll the loop into a series of steps.
261     #pragma unroll
262     for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
263         const uint32_t key = MAX_TRACKED_PIDS - index - 1;
264         tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
265         if (!tracked_pid) continue;
266         if (tracked_pid->pid == pid) {
267             tracked_pid->pid = 0;
268             tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
269             bpf_pid_tracked_hash_map_delete_elem(&key);
270             break;
271         }
272         if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
273             is_last = false;
274         }
275     }
276 
277     bpf_pid_task_aggregation_map_delete_elem(&pid);
278     return ALLOW;
279 }
280 
281 LICENSE("GPL");
282