1 /*
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false // STOPSHIP if true.
19
20 #include "IoOveruseMonitor.h"
21
22 #include "PackageInfoResolver.h"
23 #include "ServiceManager.h"
24
25 #include <WatchdogProperties.sysprop.h>
26 #include <aidl/android/automotive/watchdog/IResourceOveruseListener.h>
27 #include <aidl/android/automotive/watchdog/ResourceOveruseStats.h>
28 #include <aidl/android/automotive/watchdog/internal/PackageIdentifier.h>
29 #include <aidl/android/automotive/watchdog/internal/UidType.h>
30 #include <android-base/file.h>
31 #include <android-base/strings.h>
32 #include <android/util/ProtoOutputStream.h>
33 #include <binder/IPCThreadState.h>
34 #include <log/log.h>
35 #include <processgroup/sched_policy.h>
36
37 #include <pthread.h>
38
39 #include <limits>
40 #include <thread> // NOLINT(build/c++11)
41
42 namespace android {
43 namespace automotive {
44 namespace watchdog {
45
46 namespace {
47
48 using ::aidl::android::automotive::watchdog::IoOveruseStats;
49 using ::aidl::android::automotive::watchdog::IResourceOveruseListener;
50 using ::aidl::android::automotive::watchdog::PerStateBytes;
51 using ::aidl::android::automotive::watchdog::internal::ComponentType;
52 using ::aidl::android::automotive::watchdog::internal::IoOveruseConfiguration;
53 using ::aidl::android::automotive::watchdog::internal::IoUsageStats;
54 using ::aidl::android::automotive::watchdog::internal::PackageIdentifier;
55 using ::aidl::android::automotive::watchdog::internal::PackageInfo;
56 using ::aidl::android::automotive::watchdog::internal::PackageIoOveruseStats;
57 using ::aidl::android::automotive::watchdog::internal::ResourceOveruseConfiguration;
58 using ::aidl::android::automotive::watchdog::internal::ResourceOveruseStats;
59 using ::aidl::android::automotive::watchdog::internal::ResourceStats;
60 using ::aidl::android::automotive::watchdog::internal::UidType;
61 using ::aidl::android::automotive::watchdog::internal::UserPackageIoUsageStats;
62 using ::android::IPCThreadState;
63 using ::android::sp;
64 using ::android::base::EndsWith;
65 using ::android::base::Error;
66 using ::android::base::Result;
67 using ::android::base::StringPrintf;
68 using ::android::base::WriteStringToFd;
69 using ::android::util::ProtoOutputStream;
70 using ::ndk::ScopedAIBinder_DeathRecipient;
71 using ::ndk::SpAIBinder;
72
73 constexpr int64_t kMaxInt32 = std::numeric_limits<int32_t>::max();
74 constexpr int64_t kMaxInt64 = std::numeric_limits<int64_t>::max();
75 // Minimum written bytes to sync the stats with the Watchdog service.
76 constexpr int64_t kMinSyncWrittenBytes = 100 * 1024;
77 // Minimum percentage of threshold to warn killable applications.
78 constexpr double kDefaultIoOveruseWarnPercentage = 80;
79 // Maximum numer of system-wide stats (from periodic monitoring) to cache.
80 constexpr size_t kMaxPeriodicMonitorBufferSize = 1000;
81 constexpr const char* kHelpText =
82 "\n%s dump options:\n"
83 "%s <package name>, <package name>,...: Reset resource overuse stats for the given package "
84 "names. Value for this flag is a comma-separated value containing package names.\n";
85
uniquePackageIdStr(const std::string & name,userid_t userId)86 std::string uniquePackageIdStr(const std::string& name, userid_t userId) {
87 return StringPrintf("%s:%" PRId32, name.c_str(), userId);
88 }
89
uniquePackageIdStr(const PackageIdentifier & id)90 std::string uniquePackageIdStr(const PackageIdentifier& id) {
91 return uniquePackageIdStr(id.name, multiuser_get_user_id(id.uid));
92 }
93
sum(const PerStateBytes & lhs,const PerStateBytes & rhs)94 PerStateBytes sum(const PerStateBytes& lhs, const PerStateBytes& rhs) {
95 const auto sum = [](const int64_t& l, const int64_t& r) -> int64_t {
96 return (kMaxInt64 - l) > r ? (l + r) : kMaxInt64;
97 };
98 PerStateBytes result;
99 result.foregroundBytes = sum(lhs.foregroundBytes, rhs.foregroundBytes);
100 result.backgroundBytes = sum(lhs.backgroundBytes, rhs.backgroundBytes);
101 result.garageModeBytes = sum(lhs.garageModeBytes, rhs.garageModeBytes);
102 return result;
103 }
104
diff(const PerStateBytes & lhs,const PerStateBytes & rhs)105 PerStateBytes diff(const PerStateBytes& lhs, const PerStateBytes& rhs) {
106 const auto sub = [](const int64_t& l, const int64_t& r) -> int64_t {
107 return l >= r ? (l - r) : 0;
108 };
109 PerStateBytes result;
110 result.foregroundBytes = sub(lhs.foregroundBytes, rhs.foregroundBytes);
111 result.backgroundBytes = sub(lhs.backgroundBytes, rhs.backgroundBytes);
112 result.garageModeBytes = sub(lhs.garageModeBytes, rhs.garageModeBytes);
113 return result;
114 }
115
calculateStartAndDuration(struct tm currentTm)116 std::tuple<int64_t, int64_t> calculateStartAndDuration(struct tm currentTm) {
117 // The stats are stored per-day so the start time is always the beginning of the day.
118 auto startTm = currentTm;
119 startTm.tm_sec = 0;
120 startTm.tm_min = 0;
121 startTm.tm_hour = 0;
122
123 int64_t startTime = static_cast<int64_t>(timegm(&startTm));
124 int64_t currentEpochSeconds = static_cast<int64_t>(timegm(¤tTm));
125 return std::make_tuple(startTime, currentEpochSeconds - startTime);
126 }
127
totalPerStateBytes(PerStateBytes perStateBytes)128 int64_t totalPerStateBytes(PerStateBytes perStateBytes) {
129 const auto sum = [](const int64_t& l, const int64_t& r) -> int64_t {
130 return kMaxInt64 - l > r ? (l + r) : kMaxInt64;
131 };
132 return sum(perStateBytes.foregroundBytes,
133 sum(perStateBytes.backgroundBytes, perStateBytes.garageModeBytes));
134 }
135
calculateOveruseAndForgivenBytes(PerStateBytes writtenBytes,PerStateBytes threshold)136 std::tuple<int32_t, PerStateBytes> calculateOveruseAndForgivenBytes(PerStateBytes writtenBytes,
137 PerStateBytes threshold) {
138 const auto div = [](const int64_t& l, const int64_t& r) -> int32_t {
139 return r > 0 ? (l / r) : 1;
140 };
141 const auto mul = [](const int32_t& l, const int32_t& r) -> int32_t {
142 if (l == 0 || r == 0) {
143 return 0;
144 }
145 return (kMaxInt32 / r) > l ? (l * r) : kMaxInt32;
146 };
147 const auto sum = [](const int32_t& l, const int32_t& r) -> int32_t {
148 return (kMaxInt32 - l) > r ? (l + r) : kMaxInt32;
149 };
150 int32_t foregroundOveruses = div(writtenBytes.foregroundBytes, threshold.foregroundBytes);
151 int32_t backgroundOveruses = div(writtenBytes.backgroundBytes, threshold.backgroundBytes);
152 int32_t garageModeOveruses = div(writtenBytes.garageModeBytes, threshold.garageModeBytes);
153 int32_t totalOveruses = sum(foregroundOveruses, sum(backgroundOveruses, garageModeOveruses));
154
155 PerStateBytes forgivenWriteBytes;
156 forgivenWriteBytes.foregroundBytes = mul(foregroundOveruses, threshold.foregroundBytes);
157 forgivenWriteBytes.backgroundBytes = mul(backgroundOveruses, threshold.backgroundBytes);
158 forgivenWriteBytes.garageModeBytes = mul(garageModeOveruses, threshold.garageModeBytes);
159
160 return std::make_tuple(totalOveruses, forgivenWriteBytes);
161 }
162
onBinderDied(void * cookie)163 void onBinderDied(void* cookie) {
164 const auto& thiz = ServiceManager::getInstance()->getIoOveruseMonitor();
165 if (thiz == nullptr) {
166 return;
167 }
168 thiz->handleBinderDeath(cookie);
169 }
170
171 } // namespace
172
calculateStartAndDuration(const time_point_millis & currentTime)173 std::tuple<int64_t, int64_t> calculateStartAndDuration(const time_point_millis& currentTime) {
174 auto timeInSeconds = std::chrono::system_clock::to_time_t(currentTime);
175 struct tm currentGmt;
176 gmtime_r(&timeInSeconds, ¤tGmt);
177 return calculateStartAndDuration(currentGmt);
178 }
179
IoOveruseMonitor(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper)180 IoOveruseMonitor::IoOveruseMonitor(
181 const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper) :
182 mMinSyncWrittenBytes(kMinSyncWrittenBytes),
183 mWatchdogServiceHelper(watchdogServiceHelper),
184 mDeathRegistrationWrapper(sp<AIBinderDeathRegistrationWrapper>::make()),
185 mDidReadTodayPrevBootStats(false),
186 mSystemWideWrittenBytes({}),
187 mPeriodicMonitorBufferSize(0),
188 mLastSystemWideIoMonitorTime(0),
189 mUserPackageDailyIoUsageById({}),
190 mIoOveruseWarnPercentage(0),
191 mLastUserPackageIoMonitorTime(time_point_millis::min()),
192 mOveruseListenersByUid({}),
193 mBinderDeathRecipient(
194 ScopedAIBinder_DeathRecipient(AIBinder_DeathRecipient_new(onBinderDied))) {}
195
init()196 Result<void> IoOveruseMonitor::init() {
197 std::unique_lock writeLock(mRwMutex);
198 if (isInitializedLocked()) {
199 return Error() << "Cannot initialize " << name() << " more than once";
200 }
201 mPeriodicMonitorBufferSize = static_cast<size_t>(
202 sysprop::periodicMonitorBufferSize().value_or(kDefaultPeriodicMonitorBufferSize));
203 if (mPeriodicMonitorBufferSize == 0 ||
204 mPeriodicMonitorBufferSize > kMaxPeriodicMonitorBufferSize) {
205 return Error() << "Periodic monitor buffer size cannot be zero or above "
206 << kDefaultPeriodicMonitorBufferSize << ". Received "
207 << mPeriodicMonitorBufferSize;
208 }
209 mIoOveruseWarnPercentage = static_cast<double>(
210 sysprop::ioOveruseWarnPercentage().value_or(kDefaultIoOveruseWarnPercentage));
211 mIoOveruseConfigs = sp<IoOveruseConfigs>::make();
212 mPackageInfoResolver = PackageInfoResolver::getInstance();
213 mPackageInfoResolver->setPackageConfigurations(mIoOveruseConfigs->vendorPackagePrefixes(),
214 mIoOveruseConfigs->packagesToAppCategories());
215 if (DEBUG) {
216 ALOGD("Initialized %s data processor", name().c_str());
217 }
218 return {};
219 }
220
terminate()221 void IoOveruseMonitor::terminate() {
222 ALOGW("Terminating %s", name().c_str());
223 if (mWriteToDiskThread.joinable()) {
224 mWriteToDiskThread.join();
225 ALOGI("Write to disk has completed. Proceeding with termination");
226 }
227 std::unique_lock writeLock(mRwMutex);
228 mWatchdogServiceHelper.clear();
229 mIoOveruseConfigs.clear();
230 mSystemWideWrittenBytes.clear();
231 mUserPackageDailyIoUsageById.clear();
232 for (const auto& [_, listener] : mOveruseListenersByUid) {
233 AIBinder* aiBinder = listener->asBinder().get();
234 mDeathRegistrationWrapper->unlinkToDeath(aiBinder, mBinderDeathRecipient.get(),
235 static_cast<void*>(aiBinder));
236 }
237 mOveruseListenersByUid.clear();
238 if (DEBUG) {
239 ALOGD("Terminated %s data processor", name().c_str());
240 }
241 return;
242 }
243
onCarWatchdogServiceRegistered()244 void IoOveruseMonitor::onCarWatchdogServiceRegistered() {
245 std::unique_lock writeLock(mRwMutex);
246 if (!mDidReadTodayPrevBootStats) {
247 requestTodayIoUsageStatsLocked();
248 }
249 }
250
onPeriodicCollection(time_point_millis time,SystemState systemState,const android::wp<UidStatsCollectorInterface> & uidStatsCollector,const android::wp<ProcStatCollectorInterface> & procStatCollector,ResourceStats * resourceStats)251 Result<void> IoOveruseMonitor::onPeriodicCollection(
252 time_point_millis time, SystemState systemState,
253 const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
254 [[maybe_unused]] const android::wp<ProcStatCollectorInterface>& procStatCollector,
255 ResourceStats* resourceStats) {
256 android::sp<UidStatsCollectorInterface> uidStatsCollectorSp = uidStatsCollector.promote();
257 if (uidStatsCollectorSp == nullptr) {
258 return Error() << "Per-UID I/O stats collector must not be null";
259 }
260
261 auto timeInSeconds = std::chrono::system_clock::to_time_t(time);
262
263 std::unique_lock writeLock(mRwMutex);
264 if (!mDidReadTodayPrevBootStats) {
265 requestTodayIoUsageStatsLocked();
266 }
267 struct tm prevGmt, curGmt;
268 auto mLastUserPackageIoMonitorTimeInSeconds =
269 std::chrono::system_clock::to_time_t(mLastUserPackageIoMonitorTime);
270 gmtime_r(&mLastUserPackageIoMonitorTimeInSeconds, &prevGmt);
271 gmtime_r(&timeInSeconds, &curGmt);
272 if (prevGmt.tm_yday != curGmt.tm_yday || prevGmt.tm_year != curGmt.tm_year) {
273 /*
274 * Date changed so reset the daily I/O usage cache. CarWatchdogService automatically handles
275 * date change on |CarWatchdogService.latestIoOveruseStats| call.
276 */
277 mUserPackageDailyIoUsageById.clear();
278 }
279 mLastUserPackageIoMonitorTime = time;
280 const auto [startTime, durationInSeconds] = calculateStartAndDuration(curGmt);
281
282 auto uidStats = uidStatsCollectorSp->deltaStats();
283 if (uidStats.empty()) {
284 return {};
285 }
286 std::unordered_map<uid_t, IoOveruseStats> overusingNativeStats;
287 bool isGarageModeActive = systemState == SystemState::GARAGE_MODE;
288 for (const auto& curUidStats : uidStats) {
289 if (curUidStats.ioStats.sumWriteBytes() == 0 || !curUidStats.hasPackageInfo()) {
290 /* 1. Ignore UIDs with zero written bytes since the last collection because they are
291 * either already accounted for or no writes made since system start.
292 *
293 * 2. UID stats without package info is not useful because the stats isn't attributed to
294 * any package/service.
295 */
296 continue;
297 }
298 UserPackageIoUsage curUsage(curUidStats.packageInfo, curUidStats.ioStats,
299 isGarageModeActive);
300
301 if (!mPrevBootIoUsageStatsById.empty()) {
302 if (auto prevBootStats = mPrevBootIoUsageStatsById.find(curUsage.id());
303 prevBootStats != mPrevBootIoUsageStatsById.end()) {
304 curUsage += prevBootStats->second;
305 mPrevBootIoUsageStatsById.erase(prevBootStats);
306 }
307 }
308 UserPackageIoUsage* dailyIoUsage;
309 if (auto cachedUsage = mUserPackageDailyIoUsageById.find(curUsage.id());
310 cachedUsage != mUserPackageDailyIoUsageById.end()) {
311 cachedUsage->second += curUsage;
312 dailyIoUsage = &cachedUsage->second;
313 } else {
314 const auto& [it, wasInserted] = mUserPackageDailyIoUsageById.insert(
315 std::pair(curUsage.id(), std::move(curUsage)));
316 dailyIoUsage = &it->second;
317 }
318
319 const auto threshold = mIoOveruseConfigs->fetchThreshold(dailyIoUsage->packageInfo);
320
321 const auto deltaWrittenBytes =
322 diff(dailyIoUsage->writtenBytes, dailyIoUsage->forgivenWriteBytes);
323 const auto [currentOveruses, forgivenWriteBytes] =
324 calculateOveruseAndForgivenBytes(deltaWrittenBytes, threshold);
325 dailyIoUsage->totalOveruses += currentOveruses;
326 dailyIoUsage->forgivenWriteBytes =
327 sum(dailyIoUsage->forgivenWriteBytes, forgivenWriteBytes);
328
329 PackageIoOveruseStats stats;
330 stats.uid = curUidStats.packageInfo.packageIdentifier.uid;
331 stats.shouldNotify = false;
332 stats.forgivenWriteBytes = dailyIoUsage->forgivenWriteBytes;
333 stats.ioOveruseStats.startTime = startTime;
334 stats.ioOveruseStats.durationInSeconds = durationInSeconds;
335 stats.ioOveruseStats.writtenBytes = dailyIoUsage->writtenBytes;
336 stats.ioOveruseStats.totalOveruses = dailyIoUsage->totalOveruses;
337 stats.ioOveruseStats.remainingWriteBytes = diff(threshold, deltaWrittenBytes);
338 stats.ioOveruseStats.killableOnOveruse =
339 mIoOveruseConfigs->isSafeToKill(dailyIoUsage->packageInfo);
340
341 const auto& remainingWriteBytes = stats.ioOveruseStats.remainingWriteBytes;
342 const auto exceedsWarnThreshold = [&](double remaining, double threshold) {
343 if (threshold == 0) {
344 return true;
345 }
346 double usedPercent = (100 - (remaining / threshold) * 100);
347 return usedPercent > mIoOveruseWarnPercentage;
348 };
349 bool shouldSyncWatchdogService =
350 (totalPerStateBytes(dailyIoUsage->writtenBytes) -
351 dailyIoUsage->lastSyncedWrittenBytes) >= mMinSyncWrittenBytes;
352 if (currentOveruses > 0) {
353 dailyIoUsage->isPackageWarned = false;
354 /*
355 * Send notifications for native service I/O overuses as well because system listeners
356 * need to be notified of all I/O overuses.
357 */
358 stats.shouldNotify = true;
359 if (dailyIoUsage->packageInfo.uidType == UidType::NATIVE) {
360 overusingNativeStats[stats.uid] = stats.ioOveruseStats;
361 }
362 shouldSyncWatchdogService = true;
363 } else if (dailyIoUsage->packageInfo.uidType == UidType::APPLICATION &&
364 stats.ioOveruseStats.killableOnOveruse && !dailyIoUsage->isPackageWarned &&
365 (exceedsWarnThreshold(remainingWriteBytes.foregroundBytes,
366 threshold.foregroundBytes) ||
367 exceedsWarnThreshold(remainingWriteBytes.backgroundBytes,
368 threshold.backgroundBytes) ||
369 exceedsWarnThreshold(remainingWriteBytes.garageModeBytes,
370 threshold.garageModeBytes))) {
371 /*
372 * No need to warn native services or applications that won't be killed on I/O overuse
373 * as they will be sent a notification when they exceed their daily threshold.
374 */
375 stats.shouldNotify = true;
376 // Avoid duplicate warning before the daily threshold exceeded notification is sent.
377 dailyIoUsage->isPackageWarned = true;
378 shouldSyncWatchdogService = true;
379 }
380 if (shouldSyncWatchdogService) {
381 dailyIoUsage->lastSyncedWrittenBytes = totalPerStateBytes(dailyIoUsage->writtenBytes);
382 mLatestIoOveruseStats.emplace_back(std::move(stats));
383 }
384 }
385 if (!overusingNativeStats.empty()) {
386 notifyNativePackagesLocked(overusingNativeStats);
387 }
388 if (mLatestIoOveruseStats.empty()) {
389 return {};
390 }
391 if (!(resourceStats->resourceOveruseStats).has_value()) {
392 resourceStats->resourceOveruseStats = std::make_optional<ResourceOveruseStats>({});
393 }
394 resourceStats->resourceOveruseStats->packageIoOveruseStats = mLatestIoOveruseStats;
395 // Clear the cache
396 mLatestIoOveruseStats.clear();
397 return {};
398 }
399
onCustomCollection(time_point_millis time,SystemState systemState,const std::unordered_set<std::string> & filterPackages,const android::wp<UidStatsCollectorInterface> & uidStatsCollector,const android::wp<ProcStatCollectorInterface> & procStatCollector,ResourceStats * resourceStats)400 Result<void> IoOveruseMonitor::onCustomCollection(
401 time_point_millis time, SystemState systemState,
402 [[maybe_unused]] const std::unordered_set<std::string>& filterPackages,
403 const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
404 const android::wp<ProcStatCollectorInterface>& procStatCollector,
405 ResourceStats* resourceStats) {
406 // Nothing special for custom collection.
407 return onPeriodicCollection(time, systemState, uidStatsCollector, procStatCollector,
408 resourceStats);
409 }
410
onPeriodicMonitor(time_t time,const android::wp<ProcDiskStatsCollectorInterface> & procDiskStatsCollector,const std::function<void ()> & alertHandler)411 Result<void> IoOveruseMonitor::onPeriodicMonitor(
412 time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector,
413 const std::function<void()>& alertHandler) {
414 if (procDiskStatsCollector == nullptr) {
415 return Error() << "Proc disk stats collector must not be null";
416 }
417
418 std::unique_lock writeLock(mRwMutex);
419 if (mLastSystemWideIoMonitorTime == 0) {
420 /*
421 * Do not record the first disk stats as it reflects the aggregated disks stats since the
422 * system boot up and is not in sync with the polling period. This will lead to spurious
423 * I/O overuse alerting.
424 */
425 mLastSystemWideIoMonitorTime = time;
426 return {};
427 }
428 const auto diskStats = procDiskStatsCollector.promote()->deltaSystemWideDiskStats();
429 mSystemWideWrittenBytes.push_back(
430 {.pollDurationInSecs = difftime(time, mLastSystemWideIoMonitorTime),
431 .bytesInKib = diskStats.numKibWritten});
432 for (const auto& threshold : mIoOveruseConfigs->systemWideAlertThresholds()) {
433 int64_t accountedWrittenKib = 0;
434 double accountedDurationInSecs = 0;
435 size_t accountedPolls = 0;
436 for (auto rit = mSystemWideWrittenBytes.rbegin(); rit != mSystemWideWrittenBytes.rend();
437 ++rit) {
438 accountedWrittenKib += rit->bytesInKib;
439 accountedDurationInSecs += rit->pollDurationInSecs;
440 ++accountedPolls;
441 if (accountedDurationInSecs >= threshold.durationInSeconds) {
442 break;
443 }
444 }
445 // Heuristic to handle spurious alerting when the buffer is partially filled.
446 if (const size_t bufferSize = mSystemWideWrittenBytes.size();
447 accountedPolls == bufferSize && bufferSize < mPeriodicMonitorBufferSize + 1 &&
448 threshold.durationInSeconds > accountedDurationInSecs) {
449 continue;
450 }
451 const double thresholdKbps = threshold.writtenBytesPerSecond / 1024.0;
452 if (const auto kbps = accountedWrittenKib / accountedDurationInSecs;
453 kbps >= thresholdKbps) {
454 alertHandler();
455 break;
456 }
457 }
458 if (mSystemWideWrittenBytes.size() > mPeriodicMonitorBufferSize) {
459 mSystemWideWrittenBytes.erase(mSystemWideWrittenBytes.begin()); // Erase the oldest entry.
460 }
461 mLastSystemWideIoMonitorTime = time;
462 return {};
463 }
464
onDump(int fd) const465 Result<void> IoOveruseMonitor::onDump([[maybe_unused]] int fd) const {
466 // TODO(b/183436216): Dump the list of killed/disabled packages. Dump the list of packages that
467 // exceed xx% of their threshold.
468 return {};
469 }
470
onDumpProto(const CollectionIntervals & collectionIntervals,ProtoOutputStream & outProto) const471 Result<void> IoOveruseMonitor::onDumpProto(
472 [[maybe_unused]] const CollectionIntervals& collectionIntervals,
473 [[maybe_unused]] ProtoOutputStream& outProto) const {
474 // TODO(b/296123577): Dump the list of killed/disabled packages in proto format.
475 return {};
476 }
477
dumpHelpText(int fd) const478 bool IoOveruseMonitor::dumpHelpText(int fd) const {
479 return WriteStringToFd(StringPrintf(kHelpText, name().c_str(), kResetResourceOveruseStatsFlag),
480 fd);
481 }
482
requestTodayIoUsageStatsLocked()483 void IoOveruseMonitor::requestTodayIoUsageStatsLocked() {
484 if (const auto status = mWatchdogServiceHelper->requestTodayIoUsageStats(); !status.isOk()) {
485 // Request made only after CarWatchdogService connection is established. Logging the error
486 // is enough in this case.
487 ALOGE("Failed to request today I/O usage stats collected during previous boot: %s",
488 status.getMessage());
489 return;
490 }
491 if (DEBUG) {
492 ALOGD("Requested today's I/O usage stats collected during previous boot.");
493 }
494 }
495
onTodayIoUsageStatsFetched(const std::vector<UserPackageIoUsageStats> & userPackageIoUsageStats)496 Result<void> IoOveruseMonitor::onTodayIoUsageStatsFetched(
497 const std::vector<UserPackageIoUsageStats>& userPackageIoUsageStats) {
498 std::unique_lock writeLock(mRwMutex);
499 if (mDidReadTodayPrevBootStats) {
500 return {};
501 }
502 for (const auto& statsEntry : userPackageIoUsageStats) {
503 std::string uniqueId = uniquePackageIdStr(statsEntry.packageName,
504 static_cast<userid_t>(statsEntry.userId));
505 if (auto it = mUserPackageDailyIoUsageById.find(uniqueId);
506 it != mUserPackageDailyIoUsageById.end()) {
507 it->second += statsEntry.ioUsageStats;
508 continue;
509 }
510 mPrevBootIoUsageStatsById.insert(std::pair(uniqueId, statsEntry.ioUsageStats));
511 }
512 mDidReadTodayPrevBootStats = true;
513 return {};
514 }
515
notifyNativePackagesLocked(const std::unordered_map<uid_t,IoOveruseStats> & statsByUid)516 void IoOveruseMonitor::notifyNativePackagesLocked(
517 const std::unordered_map<uid_t, IoOveruseStats>& statsByUid) {
518 for (const auto& [uid, ioOveruseStats] : statsByUid) {
519 IResourceOveruseListener* listener;
520 if (const auto it = mOveruseListenersByUid.find(uid); it == mOveruseListenersByUid.end()) {
521 continue;
522 } else {
523 listener = it->second.get();
524 }
525 aidl::android::automotive::watchdog::ResourceOveruseStats stats;
526 stats.set<aidl::android::automotive::watchdog::ResourceOveruseStats::ioOveruseStats>(
527 ioOveruseStats);
528 listener->onOveruse(stats);
529 }
530 if (DEBUG) {
531 ALOGD("Notified native packages on I/O overuse");
532 }
533 }
534
updateResourceOveruseConfigurations(const std::vector<ResourceOveruseConfiguration> & configs)535 Result<void> IoOveruseMonitor::updateResourceOveruseConfigurations(
536 const std::vector<ResourceOveruseConfiguration>& configs) {
537 std::unique_lock writeLock(mRwMutex);
538 if (!isInitializedLocked()) {
539 return Error(EX_ILLEGAL_STATE) << name() << " is not initialized";
540 }
541 if (const auto result = mIoOveruseConfigs->update(configs); !result.ok()) {
542 return result;
543 }
544 // When mWriteToDiskThread is already active, don't create a new thread to perform the same
545 // work. This thread writes to disk only after acquiring the mRwMutex write lock and the below
546 // check is performed after acquiring the same write lock. Thus, if the thread is still active
547 // and mIsWriteToDiskPending is true at this point, it indicates the thread hasn't performed
548 // the write and will write the latest updated configs when it executes.
549 if (bool isJoinable = mWriteToDiskThread.joinable(); isJoinable && mIsWriteToDiskPending) {
550 ALOGW("Skipping resource overuse configs write to disk due to ongoing write");
551 return {};
552 } else if (isJoinable) {
553 // At this point we know the thread has completed execution. Join the thread before
554 // creating a new one. Failure to join can lead to a crash since std::thread cannot
555 // destruct a thread object without first calling join.
556 mWriteToDiskThread.join();
557 }
558 mIsWriteToDiskPending = true;
559 mWriteToDiskThread = std::thread([&]() {
560 ALOGI("Writing resource overuse configs to disk");
561 if (set_sched_policy(0, SP_BACKGROUND) != 0) {
562 ALOGW("Failed to set background scheduling priority for writing resource overuse "
563 "configs to disk");
564 }
565 if (int result = pthread_setname_np(pthread_self(), "ResOveruseCfgWr"); result != 0) {
566 ALOGE("Failed to set thread name to 'ResOveruseCfgWr'");
567 }
568 std::unique_lock writeLock(mRwMutex);
569 if (mIoOveruseConfigs == nullptr) {
570 ALOGE("IoOveruseConfigs instance is null");
571 } else if (const auto result = mIoOveruseConfigs->writeToDisk(); !result.ok()) {
572 ALOGE("Failed to write resource overuse configs to disk: %s",
573 result.error().message().c_str());
574 } else {
575 ALOGI("Successfully wrote resource overuse configs to disk");
576 }
577 mIsWriteToDiskPending = false;
578 });
579
580 return {};
581 }
582
getResourceOveruseConfigurations(std::vector<ResourceOveruseConfiguration> * configs) const583 Result<void> IoOveruseMonitor::getResourceOveruseConfigurations(
584 std::vector<ResourceOveruseConfiguration>* configs) const {
585 std::shared_lock readLock(mRwMutex);
586 if (!isInitializedLocked()) {
587 return Error(EX_ILLEGAL_STATE) << name() << " is not initialized";
588 }
589 mIoOveruseConfigs->get(configs);
590 return {};
591 }
592
addIoOveruseListener(const std::shared_ptr<IResourceOveruseListener> & listener)593 Result<void> IoOveruseMonitor::addIoOveruseListener(
594 const std::shared_ptr<IResourceOveruseListener>& listener) {
595 if (listener == nullptr) {
596 return Error(EX_ILLEGAL_ARGUMENT) << "Must provide non-null listener";
597 }
598 auto binder = listener->asBinder();
599 pid_t callingPid = IPCThreadState::self()->getCallingPid();
600 uid_t callingUid = IPCThreadState::self()->getCallingUid();
601 {
602 std::unique_lock writeLock(mRwMutex);
603 if (!isInitializedLocked()) {
604 // mBinderDeathRecipient is initialized inside init.
605 return Error(EX_ILLEGAL_STATE) << "Service is not initialized";
606 }
607 if (findListenerAndProcessLocked(reinterpret_cast<uintptr_t>(binder.get()), nullptr)) {
608 ALOGW("Failed to register the I/O overuse listener (pid: %d, uid: %d) as it is already "
609 "registered",
610 callingPid, callingUid);
611 return {};
612 }
613 mOveruseListenersByUid[callingUid] = listener;
614 }
615 AIBinder* aiBinder = binder.get();
616 auto status = mDeathRegistrationWrapper->linkToDeath(aiBinder, mBinderDeathRecipient.get(),
617 static_cast<void*>(aiBinder));
618 if (!status.isOk()) {
619 std::unique_lock writeLock(mRwMutex);
620 if (const auto& it = mOveruseListenersByUid.find(callingUid);
621 it != mOveruseListenersByUid.end() && it->second->asBinder() == binder) {
622 mOveruseListenersByUid.erase(it);
623 }
624 return Error(EX_ILLEGAL_STATE) << "Failed to add I/O overuse listener: (pid " << callingPid
625 << ", uid: " << callingUid << ") is dead";
626 }
627 if (DEBUG) {
628 ALOGD("Added I/O overuse listener for uid: %d", callingUid);
629 }
630 return {};
631 }
632
removeIoOveruseListener(const std::shared_ptr<IResourceOveruseListener> & listener)633 Result<void> IoOveruseMonitor::removeIoOveruseListener(
634 const std::shared_ptr<IResourceOveruseListener>& listener) {
635 if (listener == nullptr) {
636 return Error(EX_ILLEGAL_ARGUMENT) << "Must provide non-null listener";
637 }
638 std::unique_lock writeLock(mRwMutex);
639 if (!isInitializedLocked()) {
640 // mBinderDeathRecipient is initialized inside init.
641 return Error(EX_ILLEGAL_STATE) << "Service is not initialized";
642 }
643 const auto processor = [&](ListenersByUidMap& listeners, ListenersByUidMap::const_iterator it) {
644 AIBinder* aiBinder = it->second->asBinder().get();
645 mDeathRegistrationWrapper->unlinkToDeath(aiBinder, mBinderDeathRecipient.get(),
646 static_cast<void*>(aiBinder));
647 listeners.erase(it);
648 };
649 if (!findListenerAndProcessLocked(reinterpret_cast<uintptr_t>(listener->asBinder().get()),
650 processor)) {
651 return Error(EX_ILLEGAL_ARGUMENT) << "Listener is not previously registered";
652 }
653 if (DEBUG) {
654 ALOGD("Removed I/O overuse listener for uid: %d", IPCThreadState::self()->getCallingUid());
655 }
656 return {};
657 }
658
getIoOveruseStats(IoOveruseStats * ioOveruseStats) const659 Result<void> IoOveruseMonitor::getIoOveruseStats(IoOveruseStats* ioOveruseStats) const {
660 if (!isInitialized()) {
661 return Error(EX_ILLEGAL_STATE) << "I/O overuse monitor is not initialized";
662 }
663 uid_t callingUid = IPCThreadState::self()->getCallingUid();
664 const auto packageInfosByUid = mPackageInfoResolver->getPackageInfosForUids({callingUid});
665 const PackageInfo* packageInfo;
666 if (const auto it = packageInfosByUid.find(callingUid); it == packageInfosByUid.end()) {
667 return Error(EX_ILLEGAL_ARGUMENT)
668 << "Package information not available for calling UID(" << callingUid << ")";
669 } else {
670 packageInfo = &it->second;
671 }
672 std::shared_lock readLock(mRwMutex);
673 const UserPackageIoUsage* dailyIoUsage;
674 if (const auto it = mUserPackageDailyIoUsageById.find(
675 uniquePackageIdStr(packageInfo->packageIdentifier));
676 it == mUserPackageDailyIoUsageById.end()) {
677 return Error(EX_ILLEGAL_ARGUMENT)
678 << "Calling UID " << callingUid << " doesn't have I/O overuse stats";
679 } else {
680 dailyIoUsage = &it->second;
681 }
682 ioOveruseStats->killableOnOveruse = mIoOveruseConfigs->isSafeToKill(*packageInfo);
683 const auto thresholdBytes = mIoOveruseConfigs->fetchThreshold(*packageInfo);
684 ioOveruseStats->remainingWriteBytes =
685 diff(thresholdBytes,
686 diff(dailyIoUsage->writtenBytes, dailyIoUsage->forgivenWriteBytes));
687 ioOveruseStats->totalOveruses = dailyIoUsage->totalOveruses;
688 ioOveruseStats->writtenBytes = dailyIoUsage->writtenBytes;
689 const auto [startTime, durationInSeconds] =
690 calculateStartAndDuration(mLastUserPackageIoMonitorTime);
691 ioOveruseStats->startTime = startTime;
692 ioOveruseStats->durationInSeconds = durationInSeconds;
693 if (DEBUG) {
694 ALOGD("Returning I/O overuse stats for uid: %d", callingUid);
695 }
696 return {};
697 }
698
resetIoOveruseStats(const std::vector<std::string> & packageNames)699 Result<void> IoOveruseMonitor::resetIoOveruseStats(const std::vector<std::string>& packageNames) {
700 if (const auto status = mWatchdogServiceHelper->resetResourceOveruseStats(packageNames);
701 !status.isOk()) {
702 return Error() << "Failed to reset stats in watchdog service: " << status.getDescription();
703 }
704 std::unordered_set<std::string> uniquePackageNames;
705 std::copy(packageNames.begin(), packageNames.end(),
706 std::inserter(uniquePackageNames, uniquePackageNames.end()));
707 std::unique_lock writeLock(mRwMutex);
708 for (auto& [key, usage] : mUserPackageDailyIoUsageById) {
709 if (uniquePackageNames.find(usage.packageInfo.packageIdentifier.name) !=
710 uniquePackageNames.end()) {
711 usage.resetStats();
712 }
713 }
714 return {};
715 }
716
removeStatsForUser(userid_t userId)717 void IoOveruseMonitor::removeStatsForUser(userid_t userId) {
718 std::unique_lock writeLock(mRwMutex);
719 for (auto it = mUserPackageDailyIoUsageById.begin();
720 it != mUserPackageDailyIoUsageById.end();) {
721 if (multiuser_get_user_id(it->second.packageInfo.packageIdentifier.uid) == userId) {
722 it = mUserPackageDailyIoUsageById.erase(it);
723 } else {
724 ++it;
725 }
726 }
727 // |mPrevBootIoUsageStatsById| keys are constructed using |uniquePackageIdStr| method. Thus, the
728 // key suffix would contain the userId. The value in this map is |IoUsageStats|, which doesn't
729 // contain the userId, so this is the only way to delete cached previous boot stats for
730 // the removed user.
731 std::string keySuffix = StringPrintf(":%" PRId32, userId);
732 for (auto it = mPrevBootIoUsageStatsById.begin(); it != mPrevBootIoUsageStatsById.end();) {
733 if (EndsWith(it->first, keySuffix)) {
734 it = mPrevBootIoUsageStatsById.erase(it);
735 } else {
736 ++it;
737 }
738 }
739 for (auto it = mLatestIoOveruseStats.begin(); it != mLatestIoOveruseStats.end();) {
740 if (multiuser_get_user_id(it->uid) == userId) {
741 it = mLatestIoOveruseStats.erase(it);
742 } else {
743 ++it;
744 }
745 }
746 }
747
handleBinderDeath(void * cookie)748 void IoOveruseMonitor::handleBinderDeath(void* cookie) {
749 uintptr_t cookieId = reinterpret_cast<uintptr_t>(cookie);
750
751 std::unique_lock writeLock(mRwMutex);
752 findListenerAndProcessLocked(cookieId,
753 [&](ListenersByUidMap& listeners,
754 ListenersByUidMap::const_iterator it) {
755 ALOGW("Resource overuse notification handler died for uid(%d)",
756 it->first);
757 listeners.erase(it);
758 });
759 }
760
findListenerAndProcessLocked(uintptr_t binderPtrId,const Processor & processor)761 bool IoOveruseMonitor::findListenerAndProcessLocked(uintptr_t binderPtrId,
762 const Processor& processor) {
763 for (auto it = mOveruseListenersByUid.begin(); it != mOveruseListenersByUid.end(); ++it) {
764 uintptr_t curBinderPtrId = reinterpret_cast<uintptr_t>(it->second->asBinder().get());
765 if (curBinderPtrId != binderPtrId) {
766 continue;
767 }
768 if (processor != nullptr) {
769 processor(mOveruseListenersByUid, it);
770 }
771 return true;
772 }
773 return false;
774 }
775
UserPackageIoUsage(const PackageInfo & pkgInfo,const UidIoStats & uidIoStats,const bool isGarageModeActive)776 IoOveruseMonitor::UserPackageIoUsage::UserPackageIoUsage(const PackageInfo& pkgInfo,
777 const UidIoStats& uidIoStats,
778 const bool isGarageModeActive) {
779 packageInfo = pkgInfo;
780 if (isGarageModeActive) {
781 writtenBytes.garageModeBytes = uidIoStats.sumWriteBytes();
782 } else {
783 writtenBytes.foregroundBytes = uidIoStats.metrics[WRITE_BYTES][FOREGROUND];
784 writtenBytes.backgroundBytes = uidIoStats.metrics[WRITE_BYTES][BACKGROUND];
785 }
786 }
787
operator +=(const UserPackageIoUsage & r)788 IoOveruseMonitor::UserPackageIoUsage& IoOveruseMonitor::UserPackageIoUsage::operator+=(
789 const UserPackageIoUsage& r) {
790 if (id() == r.id()) {
791 packageInfo = r.packageInfo;
792 }
793 writtenBytes = sum(writtenBytes, r.writtenBytes);
794 return *this;
795 }
796
operator +=(const IoUsageStats & ioUsageStats)797 IoOveruseMonitor::UserPackageIoUsage& IoOveruseMonitor::UserPackageIoUsage::operator+=(
798 const IoUsageStats& ioUsageStats) {
799 writtenBytes = sum(writtenBytes, ioUsageStats.writtenBytes);
800 forgivenWriteBytes = sum(forgivenWriteBytes, ioUsageStats.forgivenWriteBytes);
801 totalOveruses += ioUsageStats.totalOveruses;
802 return *this;
803 }
804
id() const805 const std::string IoOveruseMonitor::UserPackageIoUsage::id() const {
806 return uniquePackageIdStr(packageInfo.packageIdentifier);
807 }
808
resetStats()809 void IoOveruseMonitor::UserPackageIoUsage::resetStats() {
810 writtenBytes = {};
811 forgivenWriteBytes = {};
812 totalOveruses = 0;
813 isPackageWarned = false;
814 lastSyncedWrittenBytes = 0;
815 }
816
817 } // namespace watchdog
818 } // namespace automotive
819 } // namespace android
820