1 /*
2  * Copyright 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <bpf/BpfMap.h>
20 #include <stats_pull_atom_callback.h>
21 #include <utils/Mutex.h>
22 #include <utils/String16.h>
23 #include <utils/Vector.h>
24 
25 #include <condition_variable>
26 #include <cstdint>
27 #include <functional>
28 #include <thread>
29 
30 #include "gpuwork/gpuWork.h"
31 
32 namespace android {
33 namespace gpuwork {
34 
35 class GpuWork {
36 public:
37     using Uid = uint32_t;
38 
39     GpuWork() = default;
40     ~GpuWork();
41 
42     void initialize();
stop()43     void stop() { mStop.store(true); }
44 
45     // Dumps the GPU work information.
46     void dump(const Vector<String16>& args, std::string* result);
47 
48 private:
49     // Attaches tracepoint |tracepoint_group|/|tracepoint_name| to BPF program at path
50     // |program_path|. The tracepoint is also enabled.
51     bool attachTracepoint(const char* program_path, const char* tracepoint_group,
52                                  const char* tracepoint_name);
53 
54     // Native atom puller callback registered in statsd.
55     static AStatsManager_PullAtomCallbackReturn pullAtomCallback(int32_t atomTag,
56                                                                  AStatsEventList* data,
57                                                                  void* cookie);
58 
59     AStatsManager_PullAtomCallbackReturn pullWorkAtoms(AStatsEventList* data);
60 
61     // Periodically calls |clearMapIfNeeded| to clear the |mGpuWorkMap| map, if
62     // needed.
63     //
64     // Thread safety analysis is skipped because we need to use
65     // |std::unique_lock|, which is not currently supported by thread safety
66     // analysis.
67     void periodicallyClearMap() NO_THREAD_SAFETY_ANALYSIS;
68 
69     // Checks whether the |mGpuWorkMap| map is nearly full and, if so, clears
70     // it.
71     void clearMapIfNeeded() REQUIRES(mMutex);
72 
73     // Clears the |mGpuWorkMap| map.
74     void clearMap() REQUIRES(mMutex);
75 
76     // Waits for required permissions to become set. This seems to be needed
77     // because platform service permissions might not be set when a service
78     // first starts. See b/214085769.
79     void waitForPermissions();
80 
81     // Indicates whether our eBPF components have been initialized.
82     std::atomic<bool> mInitialized = false;
83 
84     // Indicates whether eBPF initialization should be stopped.
85     std::atomic<bool> mStop = false;
86 
87     // A thread that periodically checks whether |mGpuWorkMap| is nearly full
88     // and, if so, clears it.
89     std::thread mMapClearerThread;
90 
91     // Mutex for |mGpuWorkMap| and a few other fields.
92     std::mutex mMutex;
93 
94     // BPF map for per-UID GPU work.
95     bpf::BpfMap<GpuIdUid, UidTrackingInfo> mGpuWorkMap GUARDED_BY(mMutex);
96 
97     // BPF map containing a single element for global data.
98     bpf::BpfMap<uint32_t, GlobalData> mGpuWorkGlobalDataMap GUARDED_BY(mMutex);
99 
100     // When true, we are being destructed, so |mMapClearerThread| should stop.
101     bool mIsTerminating GUARDED_BY(mMutex);
102 
103     // A condition variable for |mIsTerminating|.
104     std::condition_variable mIsTerminatingConditionVariable GUARDED_BY(mMutex);
105 
106     // 30 second timeout for trying to attach a BPF program to a tracepoint.
107     static constexpr int kGpuWaitTimeoutSeconds = 30;
108 
109     // The wait duration for the map clearer thread; the thread checks the map
110     // every ~1 hour.
111     static constexpr uint32_t kMapClearerWaitDurationSeconds = 60 * 60;
112 
113     // Whether our |pullAtomCallback| function is registered.
114     bool mStatsdRegistered GUARDED_BY(mMutex) = false;
115 
116     // The number of randomly chosen (i.e. sampled) UIDs to log stats for.
117     static constexpr size_t kNumSampledUids = 10;
118 
119     // A "large" number of GPUs. If we observe more GPUs than this limit then
120     // we reduce the amount of stats we log.
121     static constexpr size_t kNumGpusSoftLimit = 4;
122 
123     // A "very large" number of GPUs. If we observe more GPUs than this limit
124     // then we don't log any stats.
125     static constexpr size_t kNumGpusHardLimit = 32;
126 
127     // The minimum GPU time needed to actually log stats for a UID.
128     static constexpr uint64_t kMinGpuTimeNanoseconds = 30U * 1000000000U; // 30 seconds.
129 
130     // The previous time point at which |mGpuWorkMap| was cleared.
131     std::chrono::steady_clock::time_point mPreviousMapClearTimePoint GUARDED_BY(mMutex);
132 
133     // Permission to register a statsd puller.
134     static constexpr char16_t kPermissionRegisterStatsPullAtom[] =
135             u"android.permission.REGISTER_STATS_PULL_ATOM";
136 
137     // Time limit for waiting for permissions.
138     static constexpr int kPermissionsWaitTimeoutSeconds = 30;
139 };
140 
141 } // namespace gpuwork
142 } // namespace android
143