1 /*
2  * Copyright 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #undef LOG_TAG
18 #define LOG_TAG "GpuMem"
19 #define ATRACE_TAG ATRACE_TAG_GRAPHICS
20 
21 #include "gpumem/GpuMem.h"
22 
23 #include <android-base/stringprintf.h>
24 #include <libbpf.h>
25 #include <bpf/WaitForProgsLoaded.h>
26 #include <log/log.h>
27 #include <unistd.h>
28 #include <utils/Timers.h>
29 #include <utils/Trace.h>
30 
31 #include <unordered_map>
32 #include <vector>
33 
34 namespace android {
35 
36 using base::StringAppendF;
37 
~GpuMem()38 GpuMem::~GpuMem() {
39     bpf_detach_tracepoint(kGpuMemTraceGroup, kGpuMemTotalTracepoint);
40 }
41 
initialize()42 void GpuMem::initialize() {
43     // Make sure bpf programs are loaded
44     bpf::waitForProgsLoaded();
45 
46     errno = 0;
47     int fd = bpf::retrieveProgram(kGpuMemTotalProgPath);
48     if (fd < 0) {
49         ALOGE("Failed to retrieve pinned program from %s [%d(%s)]", kGpuMemTotalProgPath, errno,
50               strerror(errno));
51         return;
52     }
53 
54     // Attach the program to the tracepoint, and the tracepoint is automatically enabled here.
55     errno = 0;
56     int count = 0;
57     while (bpf_attach_tracepoint(fd, kGpuMemTraceGroup, kGpuMemTotalTracepoint) < 0) {
58         if (++count > kGpuWaitTimeout) {
59             ALOGE("Failed to attach bpf program to %s/%s tracepoint [%d(%s)]", kGpuMemTraceGroup,
60                   kGpuMemTotalTracepoint, errno, strerror(errno));
61             return;
62         }
63         // Retry until GPU driver loaded or timeout.
64         if (mStop.load()) return;
65         sleep(1);
66     }
67 
68     // Use the read-only wrapper BpfMapRO to properly retrieve the read-only map.
69     errno = 0;
70     auto map = bpf::BpfMapRO<uint64_t, uint64_t>(kGpuMemTotalMapPath);
71     if (!map.isValid()) {
72         ALOGE("Failed to create bpf map from %s [%d(%s)]", kGpuMemTotalMapPath, errno,
73               strerror(errno));
74         return;
75     }
76     setGpuMemTotalMap(map);
77 
78     mInitialized.store(true);
79 }
80 
setGpuMemTotalMap(bpf::BpfMapRO<uint64_t,uint64_t> & map)81 void GpuMem::setGpuMemTotalMap(bpf::BpfMapRO<uint64_t, uint64_t>& map) {
82     mGpuMemTotalMap = std::move(map);
83 }
84 
85 // Dump the snapshots of global and per process memory usage on all gpus
dump(const Vector<String16> &,std::string * result)86 void GpuMem::dump(const Vector<String16>& /* args */, std::string* result) {
87     ATRACE_CALL();
88 
89     if (!mInitialized.load() || !mGpuMemTotalMap.isValid()) {
90         result->append("Failed to initialize GPU memory eBPF\n");
91         return;
92     }
93 
94     auto res = mGpuMemTotalMap.getFirstKey();
95     if (!res.ok()) {
96         result->append("GPU memory total usage map is empty\n");
97         return;
98     }
99     uint64_t key = res.value();
100     // unordered_map<gpu_id, vector<pair<pid, size>>>
101     std::unordered_map<uint32_t, std::vector<std::pair<uint32_t, uint64_t>>> dumpMap;
102     while (true) {
103         uint32_t gpu_id = key >> 32;
104         uint32_t pid = key;
105 
106         res = mGpuMemTotalMap.readValue(key);
107         if (!res.ok()) break;
108         uint64_t size = res.value();
109 
110         dumpMap[gpu_id].emplace_back(pid, size);
111 
112         res = mGpuMemTotalMap.getNextKey(key);
113         if (!res.ok()) break;
114         key = res.value();
115     }
116 
117     for (auto& gpu : dumpMap) {
118         if (gpu.second.empty()) continue;
119         StringAppendF(result, "Memory snapshot for GPU %u:\n", gpu.first);
120 
121         std::sort(gpu.second.begin(), gpu.second.end(),
122                   [](auto& l, auto& r) { return l.first < r.first; });
123 
124         int i = 0;
125         if (gpu.second[0].first != 0) {
126             StringAppendF(result, "Global total: N/A\n");
127         } else {
128             StringAppendF(result, "Global total: %" PRIu64 "\n", gpu.second[0].second);
129             i++;
130         }
131         for (; i < gpu.second.size(); i++) {
132             StringAppendF(result, "Proc %u total: %" PRIu64 "\n", gpu.second[i].first,
133                           gpu.second[i].second);
134         }
135     }
136 }
137 
traverseGpuMemTotals(const std::function<void (int64_t ts,uint32_t gpuId,uint32_t pid,uint64_t size)> & callback)138 void GpuMem::traverseGpuMemTotals(const std::function<void(int64_t ts, uint32_t gpuId, uint32_t pid,
139                                                            uint64_t size)>& callback) {
140     auto res = mGpuMemTotalMap.getFirstKey();
141     if (!res.ok()) return;
142     uint64_t key = res.value();
143     while (true) {
144         uint32_t gpu_id = key >> 32;
145         uint32_t pid = key;
146 
147         res = mGpuMemTotalMap.readValue(key);
148         if (!res.ok()) break;
149         uint64_t size = res.value();
150 
151         callback(systemTime(), gpu_id, pid, size);
152         res = mGpuMemTotalMap.getNextKey(key);
153         if (!res.ok()) break;
154         key = res.value();
155     }
156 }
157 
158 } // namespace android
159