1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 #define DEBUG false  // STOPSHIP if true.
19 
20 #include "IoOveruseMonitor.h"
21 
22 #include "PackageInfoResolver.h"
23 #include "ServiceManager.h"
24 
25 #include <WatchdogProperties.sysprop.h>
26 #include <aidl/android/automotive/watchdog/IResourceOveruseListener.h>
27 #include <aidl/android/automotive/watchdog/ResourceOveruseStats.h>
28 #include <aidl/android/automotive/watchdog/internal/PackageIdentifier.h>
29 #include <aidl/android/automotive/watchdog/internal/UidType.h>
30 #include <android-base/file.h>
31 #include <android-base/strings.h>
32 #include <android/util/ProtoOutputStream.h>
33 #include <binder/IPCThreadState.h>
34 #include <log/log.h>
35 #include <processgroup/sched_policy.h>
36 
37 #include <pthread.h>
38 
39 #include <limits>
40 #include <thread>  // NOLINT(build/c++11)
41 
42 namespace android {
43 namespace automotive {
44 namespace watchdog {
45 
46 namespace {
47 
48 using ::aidl::android::automotive::watchdog::IoOveruseStats;
49 using ::aidl::android::automotive::watchdog::IResourceOveruseListener;
50 using ::aidl::android::automotive::watchdog::PerStateBytes;
51 using ::aidl::android::automotive::watchdog::internal::ComponentType;
52 using ::aidl::android::automotive::watchdog::internal::IoOveruseConfiguration;
53 using ::aidl::android::automotive::watchdog::internal::IoUsageStats;
54 using ::aidl::android::automotive::watchdog::internal::PackageIdentifier;
55 using ::aidl::android::automotive::watchdog::internal::PackageInfo;
56 using ::aidl::android::automotive::watchdog::internal::PackageIoOveruseStats;
57 using ::aidl::android::automotive::watchdog::internal::ResourceOveruseConfiguration;
58 using ::aidl::android::automotive::watchdog::internal::ResourceOveruseStats;
59 using ::aidl::android::automotive::watchdog::internal::ResourceStats;
60 using ::aidl::android::automotive::watchdog::internal::UidType;
61 using ::aidl::android::automotive::watchdog::internal::UserPackageIoUsageStats;
62 using ::android::IPCThreadState;
63 using ::android::sp;
64 using ::android::base::EndsWith;
65 using ::android::base::Error;
66 using ::android::base::Result;
67 using ::android::base::StringPrintf;
68 using ::android::base::WriteStringToFd;
69 using ::android::util::ProtoOutputStream;
70 using ::ndk::ScopedAIBinder_DeathRecipient;
71 using ::ndk::SpAIBinder;
72 
73 constexpr int64_t kMaxInt32 = std::numeric_limits<int32_t>::max();
74 constexpr int64_t kMaxInt64 = std::numeric_limits<int64_t>::max();
75 // Minimum written bytes to sync the stats with the Watchdog service.
76 constexpr int64_t kMinSyncWrittenBytes = 100 * 1024;
77 // Minimum percentage of threshold to warn killable applications.
78 constexpr double kDefaultIoOveruseWarnPercentage = 80;
79 // Maximum numer of system-wide stats (from periodic monitoring) to cache.
80 constexpr size_t kMaxPeriodicMonitorBufferSize = 1000;
81 constexpr const char* kHelpText =
82         "\n%s dump options:\n"
83         "%s <package name>, <package name>,...: Reset resource overuse stats for the given package "
84         "names. Value for this flag is a comma-separated value containing package names.\n";
85 
uniquePackageIdStr(const std::string & name,userid_t userId)86 std::string uniquePackageIdStr(const std::string& name, userid_t userId) {
87     return StringPrintf("%s:%" PRId32, name.c_str(), userId);
88 }
89 
uniquePackageIdStr(const PackageIdentifier & id)90 std::string uniquePackageIdStr(const PackageIdentifier& id) {
91     return uniquePackageIdStr(id.name, multiuser_get_user_id(id.uid));
92 }
93 
sum(const PerStateBytes & lhs,const PerStateBytes & rhs)94 PerStateBytes sum(const PerStateBytes& lhs, const PerStateBytes& rhs) {
95     const auto sum = [](const int64_t& l, const int64_t& r) -> int64_t {
96         return (kMaxInt64 - l) > r ? (l + r) : kMaxInt64;
97     };
98     PerStateBytes result;
99     result.foregroundBytes = sum(lhs.foregroundBytes, rhs.foregroundBytes);
100     result.backgroundBytes = sum(lhs.backgroundBytes, rhs.backgroundBytes);
101     result.garageModeBytes = sum(lhs.garageModeBytes, rhs.garageModeBytes);
102     return result;
103 }
104 
diff(const PerStateBytes & lhs,const PerStateBytes & rhs)105 PerStateBytes diff(const PerStateBytes& lhs, const PerStateBytes& rhs) {
106     const auto sub = [](const int64_t& l, const int64_t& r) -> int64_t {
107         return l >= r ? (l - r) : 0;
108     };
109     PerStateBytes result;
110     result.foregroundBytes = sub(lhs.foregroundBytes, rhs.foregroundBytes);
111     result.backgroundBytes = sub(lhs.backgroundBytes, rhs.backgroundBytes);
112     result.garageModeBytes = sub(lhs.garageModeBytes, rhs.garageModeBytes);
113     return result;
114 }
115 
calculateStartAndDuration(struct tm currentTm)116 std::tuple<int64_t, int64_t> calculateStartAndDuration(struct tm currentTm) {
117     // The stats are stored per-day so the start time is always the beginning of the day.
118     auto startTm = currentTm;
119     startTm.tm_sec = 0;
120     startTm.tm_min = 0;
121     startTm.tm_hour = 0;
122 
123     int64_t startTime = static_cast<int64_t>(timegm(&startTm));
124     int64_t currentEpochSeconds = static_cast<int64_t>(timegm(&currentTm));
125     return std::make_tuple(startTime, currentEpochSeconds - startTime);
126 }
127 
totalPerStateBytes(PerStateBytes perStateBytes)128 int64_t totalPerStateBytes(PerStateBytes perStateBytes) {
129     const auto sum = [](const int64_t& l, const int64_t& r) -> int64_t {
130         return kMaxInt64 - l > r ? (l + r) : kMaxInt64;
131     };
132     return sum(perStateBytes.foregroundBytes,
133                sum(perStateBytes.backgroundBytes, perStateBytes.garageModeBytes));
134 }
135 
calculateOveruseAndForgivenBytes(PerStateBytes writtenBytes,PerStateBytes threshold)136 std::tuple<int32_t, PerStateBytes> calculateOveruseAndForgivenBytes(PerStateBytes writtenBytes,
137                                                                     PerStateBytes threshold) {
138     const auto div = [](const int64_t& l, const int64_t& r) -> int32_t {
139         return r > 0 ? (l / r) : 1;
140     };
141     const auto mul = [](const int32_t& l, const int32_t& r) -> int32_t {
142         if (l == 0 || r == 0) {
143             return 0;
144         }
145         return (kMaxInt32 / r) > l ? (l * r) : kMaxInt32;
146     };
147     const auto sum = [](const int32_t& l, const int32_t& r) -> int32_t {
148         return (kMaxInt32 - l) > r ? (l + r) : kMaxInt32;
149     };
150     int32_t foregroundOveruses = div(writtenBytes.foregroundBytes, threshold.foregroundBytes);
151     int32_t backgroundOveruses = div(writtenBytes.backgroundBytes, threshold.backgroundBytes);
152     int32_t garageModeOveruses = div(writtenBytes.garageModeBytes, threshold.garageModeBytes);
153     int32_t totalOveruses = sum(foregroundOveruses, sum(backgroundOveruses, garageModeOveruses));
154 
155     PerStateBytes forgivenWriteBytes;
156     forgivenWriteBytes.foregroundBytes = mul(foregroundOveruses, threshold.foregroundBytes);
157     forgivenWriteBytes.backgroundBytes = mul(backgroundOveruses, threshold.backgroundBytes);
158     forgivenWriteBytes.garageModeBytes = mul(garageModeOveruses, threshold.garageModeBytes);
159 
160     return std::make_tuple(totalOveruses, forgivenWriteBytes);
161 }
162 
onBinderDied(void * cookie)163 void onBinderDied(void* cookie) {
164     const auto& thiz = ServiceManager::getInstance()->getIoOveruseMonitor();
165     if (thiz == nullptr) {
166         return;
167     }
168     thiz->handleBinderDeath(cookie);
169 }
170 
171 }  // namespace
172 
calculateStartAndDuration(const time_point_millis & currentTime)173 std::tuple<int64_t, int64_t> calculateStartAndDuration(const time_point_millis& currentTime) {
174     auto timeInSeconds = std::chrono::system_clock::to_time_t(currentTime);
175     struct tm currentGmt;
176     gmtime_r(&timeInSeconds, &currentGmt);
177     return calculateStartAndDuration(currentGmt);
178 }
179 
IoOveruseMonitor(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper)180 IoOveruseMonitor::IoOveruseMonitor(
181         const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper) :
182       mMinSyncWrittenBytes(kMinSyncWrittenBytes),
183       mWatchdogServiceHelper(watchdogServiceHelper),
184       mDeathRegistrationWrapper(sp<AIBinderDeathRegistrationWrapper>::make()),
185       mDidReadTodayPrevBootStats(false),
186       mSystemWideWrittenBytes({}),
187       mPeriodicMonitorBufferSize(0),
188       mLastSystemWideIoMonitorTime(0),
189       mUserPackageDailyIoUsageById({}),
190       mIoOveruseWarnPercentage(0),
191       mLastUserPackageIoMonitorTime(time_point_millis::min()),
192       mOveruseListenersByUid({}),
193       mBinderDeathRecipient(
194               ScopedAIBinder_DeathRecipient(AIBinder_DeathRecipient_new(onBinderDied))) {}
195 
init()196 Result<void> IoOveruseMonitor::init() {
197     std::unique_lock writeLock(mRwMutex);
198     if (isInitializedLocked()) {
199         return Error() << "Cannot initialize " << name() << " more than once";
200     }
201     mPeriodicMonitorBufferSize = static_cast<size_t>(
202             sysprop::periodicMonitorBufferSize().value_or(kDefaultPeriodicMonitorBufferSize));
203     if (mPeriodicMonitorBufferSize == 0 ||
204         mPeriodicMonitorBufferSize > kMaxPeriodicMonitorBufferSize) {
205         return Error() << "Periodic monitor buffer size cannot be zero or above "
206                        << kDefaultPeriodicMonitorBufferSize << ". Received "
207                        << mPeriodicMonitorBufferSize;
208     }
209     mIoOveruseWarnPercentage = static_cast<double>(
210             sysprop::ioOveruseWarnPercentage().value_or(kDefaultIoOveruseWarnPercentage));
211     mIoOveruseConfigs = sp<IoOveruseConfigs>::make();
212     mPackageInfoResolver = PackageInfoResolver::getInstance();
213     mPackageInfoResolver->setPackageConfigurations(mIoOveruseConfigs->vendorPackagePrefixes(),
214                                                    mIoOveruseConfigs->packagesToAppCategories());
215     if (DEBUG) {
216         ALOGD("Initialized %s data processor", name().c_str());
217     }
218     return {};
219 }
220 
terminate()221 void IoOveruseMonitor::terminate() {
222     ALOGW("Terminating %s", name().c_str());
223     if (mWriteToDiskThread.joinable()) {
224         mWriteToDiskThread.join();
225         ALOGI("Write to disk has completed. Proceeding with termination");
226     }
227     std::unique_lock writeLock(mRwMutex);
228     mWatchdogServiceHelper.clear();
229     mIoOveruseConfigs.clear();
230     mSystemWideWrittenBytes.clear();
231     mUserPackageDailyIoUsageById.clear();
232     for (const auto& [_, listener] : mOveruseListenersByUid) {
233         AIBinder* aiBinder = listener->asBinder().get();
234         mDeathRegistrationWrapper->unlinkToDeath(aiBinder, mBinderDeathRecipient.get(),
235                                                  static_cast<void*>(aiBinder));
236     }
237     mOveruseListenersByUid.clear();
238     if (DEBUG) {
239         ALOGD("Terminated %s data processor", name().c_str());
240     }
241     return;
242 }
243 
onCarWatchdogServiceRegistered()244 void IoOveruseMonitor::onCarWatchdogServiceRegistered() {
245     std::unique_lock writeLock(mRwMutex);
246     if (!mDidReadTodayPrevBootStats) {
247         requestTodayIoUsageStatsLocked();
248     }
249 }
250 
onPeriodicCollection(time_point_millis time,SystemState systemState,const android::wp<UidStatsCollectorInterface> & uidStatsCollector,const android::wp<ProcStatCollectorInterface> & procStatCollector,ResourceStats * resourceStats)251 Result<void> IoOveruseMonitor::onPeriodicCollection(
252         time_point_millis time, SystemState systemState,
253         const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
254         [[maybe_unused]] const android::wp<ProcStatCollectorInterface>& procStatCollector,
255         ResourceStats* resourceStats) {
256     android::sp<UidStatsCollectorInterface> uidStatsCollectorSp = uidStatsCollector.promote();
257     if (uidStatsCollectorSp == nullptr) {
258         return Error() << "Per-UID I/O stats collector must not be null";
259     }
260 
261     auto timeInSeconds = std::chrono::system_clock::to_time_t(time);
262 
263     std::unique_lock writeLock(mRwMutex);
264     if (!mDidReadTodayPrevBootStats) {
265         requestTodayIoUsageStatsLocked();
266     }
267     struct tm prevGmt, curGmt;
268     auto mLastUserPackageIoMonitorTimeInSeconds =
269             std::chrono::system_clock::to_time_t(mLastUserPackageIoMonitorTime);
270     gmtime_r(&mLastUserPackageIoMonitorTimeInSeconds, &prevGmt);
271     gmtime_r(&timeInSeconds, &curGmt);
272     if (prevGmt.tm_yday != curGmt.tm_yday || prevGmt.tm_year != curGmt.tm_year) {
273         /*
274          * Date changed so reset the daily I/O usage cache. CarWatchdogService automatically handles
275          * date change on |CarWatchdogService.latestIoOveruseStats| call.
276          */
277         mUserPackageDailyIoUsageById.clear();
278     }
279     mLastUserPackageIoMonitorTime = time;
280     const auto [startTime, durationInSeconds] = calculateStartAndDuration(curGmt);
281 
282     auto uidStats = uidStatsCollectorSp->deltaStats();
283     if (uidStats.empty()) {
284         return {};
285     }
286     std::unordered_map<uid_t, IoOveruseStats> overusingNativeStats;
287     bool isGarageModeActive = systemState == SystemState::GARAGE_MODE;
288     for (const auto& curUidStats : uidStats) {
289         if (curUidStats.ioStats.sumWriteBytes() == 0 || !curUidStats.hasPackageInfo()) {
290             /* 1. Ignore UIDs with zero written bytes since the last collection because they are
291              * either already accounted for or no writes made since system start.
292              *
293              * 2. UID stats without package info is not useful because the stats isn't attributed to
294              * any package/service.
295              */
296             continue;
297         }
298         UserPackageIoUsage curUsage(curUidStats.packageInfo, curUidStats.ioStats,
299                                     isGarageModeActive);
300 
301         if (!mPrevBootIoUsageStatsById.empty()) {
302             if (auto prevBootStats = mPrevBootIoUsageStatsById.find(curUsage.id());
303                 prevBootStats != mPrevBootIoUsageStatsById.end()) {
304                 curUsage += prevBootStats->second;
305                 mPrevBootIoUsageStatsById.erase(prevBootStats);
306             }
307         }
308         UserPackageIoUsage* dailyIoUsage;
309         if (auto cachedUsage = mUserPackageDailyIoUsageById.find(curUsage.id());
310             cachedUsage != mUserPackageDailyIoUsageById.end()) {
311             cachedUsage->second += curUsage;
312             dailyIoUsage = &cachedUsage->second;
313         } else {
314             const auto& [it, wasInserted] = mUserPackageDailyIoUsageById.insert(
315                     std::pair(curUsage.id(), std::move(curUsage)));
316             dailyIoUsage = &it->second;
317         }
318 
319         const auto threshold = mIoOveruseConfigs->fetchThreshold(dailyIoUsage->packageInfo);
320 
321         const auto deltaWrittenBytes =
322                 diff(dailyIoUsage->writtenBytes, dailyIoUsage->forgivenWriteBytes);
323         const auto [currentOveruses, forgivenWriteBytes] =
324                 calculateOveruseAndForgivenBytes(deltaWrittenBytes, threshold);
325         dailyIoUsage->totalOveruses += currentOveruses;
326         dailyIoUsage->forgivenWriteBytes =
327                 sum(dailyIoUsage->forgivenWriteBytes, forgivenWriteBytes);
328 
329         PackageIoOveruseStats stats;
330         stats.uid = curUidStats.packageInfo.packageIdentifier.uid;
331         stats.shouldNotify = false;
332         stats.forgivenWriteBytes = dailyIoUsage->forgivenWriteBytes;
333         stats.ioOveruseStats.startTime = startTime;
334         stats.ioOveruseStats.durationInSeconds = durationInSeconds;
335         stats.ioOveruseStats.writtenBytes = dailyIoUsage->writtenBytes;
336         stats.ioOveruseStats.totalOveruses = dailyIoUsage->totalOveruses;
337         stats.ioOveruseStats.remainingWriteBytes = diff(threshold, deltaWrittenBytes);
338         stats.ioOveruseStats.killableOnOveruse =
339                 mIoOveruseConfigs->isSafeToKill(dailyIoUsage->packageInfo);
340 
341         const auto& remainingWriteBytes = stats.ioOveruseStats.remainingWriteBytes;
342         const auto exceedsWarnThreshold = [&](double remaining, double threshold) {
343             if (threshold == 0) {
344                 return true;
345             }
346             double usedPercent = (100 - (remaining / threshold) * 100);
347             return usedPercent > mIoOveruseWarnPercentage;
348         };
349         bool shouldSyncWatchdogService =
350                 (totalPerStateBytes(dailyIoUsage->writtenBytes) -
351                  dailyIoUsage->lastSyncedWrittenBytes) >= mMinSyncWrittenBytes;
352         if (currentOveruses > 0) {
353             dailyIoUsage->isPackageWarned = false;
354             /*
355              * Send notifications for native service I/O overuses as well because system listeners
356              * need to be notified of all I/O overuses.
357              */
358             stats.shouldNotify = true;
359             if (dailyIoUsage->packageInfo.uidType == UidType::NATIVE) {
360                 overusingNativeStats[stats.uid] = stats.ioOveruseStats;
361             }
362             shouldSyncWatchdogService = true;
363         } else if (dailyIoUsage->packageInfo.uidType == UidType::APPLICATION &&
364                    stats.ioOveruseStats.killableOnOveruse && !dailyIoUsage->isPackageWarned &&
365                    (exceedsWarnThreshold(remainingWriteBytes.foregroundBytes,
366                                          threshold.foregroundBytes) ||
367                     exceedsWarnThreshold(remainingWriteBytes.backgroundBytes,
368                                          threshold.backgroundBytes) ||
369                     exceedsWarnThreshold(remainingWriteBytes.garageModeBytes,
370                                          threshold.garageModeBytes))) {
371             /*
372              * No need to warn native services or applications that won't be killed on I/O overuse
373              * as they will be sent a notification when they exceed their daily threshold.
374              */
375             stats.shouldNotify = true;
376             // Avoid duplicate warning before the daily threshold exceeded notification is sent.
377             dailyIoUsage->isPackageWarned = true;
378             shouldSyncWatchdogService = true;
379         }
380         if (shouldSyncWatchdogService) {
381             dailyIoUsage->lastSyncedWrittenBytes = totalPerStateBytes(dailyIoUsage->writtenBytes);
382             mLatestIoOveruseStats.emplace_back(std::move(stats));
383         }
384     }
385     if (!overusingNativeStats.empty()) {
386         notifyNativePackagesLocked(overusingNativeStats);
387     }
388     if (mLatestIoOveruseStats.empty()) {
389         return {};
390     }
391     if (!(resourceStats->resourceOveruseStats).has_value()) {
392         resourceStats->resourceOveruseStats = std::make_optional<ResourceOveruseStats>({});
393     }
394     resourceStats->resourceOveruseStats->packageIoOveruseStats = mLatestIoOveruseStats;
395     // Clear the cache
396     mLatestIoOveruseStats.clear();
397     return {};
398 }
399 
onCustomCollection(time_point_millis time,SystemState systemState,const std::unordered_set<std::string> & filterPackages,const android::wp<UidStatsCollectorInterface> & uidStatsCollector,const android::wp<ProcStatCollectorInterface> & procStatCollector,ResourceStats * resourceStats)400 Result<void> IoOveruseMonitor::onCustomCollection(
401         time_point_millis time, SystemState systemState,
402         [[maybe_unused]] const std::unordered_set<std::string>& filterPackages,
403         const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
404         const android::wp<ProcStatCollectorInterface>& procStatCollector,
405         ResourceStats* resourceStats) {
406     // Nothing special for custom collection.
407     return onPeriodicCollection(time, systemState, uidStatsCollector, procStatCollector,
408                                 resourceStats);
409 }
410 
onPeriodicMonitor(time_t time,const android::wp<ProcDiskStatsCollectorInterface> & procDiskStatsCollector,const std::function<void ()> & alertHandler)411 Result<void> IoOveruseMonitor::onPeriodicMonitor(
412         time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector,
413         const std::function<void()>& alertHandler) {
414     if (procDiskStatsCollector == nullptr) {
415         return Error() << "Proc disk stats collector must not be null";
416     }
417 
418     std::unique_lock writeLock(mRwMutex);
419     if (mLastSystemWideIoMonitorTime == 0) {
420         /*
421          * Do not record the first disk stats as it reflects the aggregated disks stats since the
422          * system boot up and is not in sync with the polling period. This will lead to spurious
423          * I/O overuse alerting.
424          */
425         mLastSystemWideIoMonitorTime = time;
426         return {};
427     }
428     const auto diskStats = procDiskStatsCollector.promote()->deltaSystemWideDiskStats();
429     mSystemWideWrittenBytes.push_back(
430             {.pollDurationInSecs = difftime(time, mLastSystemWideIoMonitorTime),
431              .bytesInKib = diskStats.numKibWritten});
432     for (const auto& threshold : mIoOveruseConfigs->systemWideAlertThresholds()) {
433         int64_t accountedWrittenKib = 0;
434         double accountedDurationInSecs = 0;
435         size_t accountedPolls = 0;
436         for (auto rit = mSystemWideWrittenBytes.rbegin(); rit != mSystemWideWrittenBytes.rend();
437              ++rit) {
438             accountedWrittenKib += rit->bytesInKib;
439             accountedDurationInSecs += rit->pollDurationInSecs;
440             ++accountedPolls;
441             if (accountedDurationInSecs >= threshold.durationInSeconds) {
442                 break;
443             }
444         }
445         // Heuristic to handle spurious alerting when the buffer is partially filled.
446         if (const size_t bufferSize = mSystemWideWrittenBytes.size();
447             accountedPolls == bufferSize && bufferSize < mPeriodicMonitorBufferSize + 1 &&
448             threshold.durationInSeconds > accountedDurationInSecs) {
449             continue;
450         }
451         const double thresholdKbps = threshold.writtenBytesPerSecond / 1024.0;
452         if (const auto kbps = accountedWrittenKib / accountedDurationInSecs;
453             kbps >= thresholdKbps) {
454             alertHandler();
455             break;
456         }
457     }
458     if (mSystemWideWrittenBytes.size() > mPeriodicMonitorBufferSize) {
459         mSystemWideWrittenBytes.erase(mSystemWideWrittenBytes.begin());  // Erase the oldest entry.
460     }
461     mLastSystemWideIoMonitorTime = time;
462     return {};
463 }
464 
onDump(int fd) const465 Result<void> IoOveruseMonitor::onDump([[maybe_unused]] int fd) const {
466     // TODO(b/183436216): Dump the list of killed/disabled packages. Dump the list of packages that
467     //  exceed xx% of their threshold.
468     return {};
469 }
470 
onDumpProto(const CollectionIntervals & collectionIntervals,ProtoOutputStream & outProto) const471 Result<void> IoOveruseMonitor::onDumpProto(
472         [[maybe_unused]] const CollectionIntervals& collectionIntervals,
473         [[maybe_unused]] ProtoOutputStream& outProto) const {
474     // TODO(b/296123577): Dump the list of killed/disabled packages in proto format.
475     return {};
476 }
477 
dumpHelpText(int fd) const478 bool IoOveruseMonitor::dumpHelpText(int fd) const {
479     return WriteStringToFd(StringPrintf(kHelpText, name().c_str(), kResetResourceOveruseStatsFlag),
480                            fd);
481 }
482 
requestTodayIoUsageStatsLocked()483 void IoOveruseMonitor::requestTodayIoUsageStatsLocked() {
484     if (const auto status = mWatchdogServiceHelper->requestTodayIoUsageStats(); !status.isOk()) {
485         // Request made only after CarWatchdogService connection is established. Logging the error
486         // is enough in this case.
487         ALOGE("Failed to request today I/O usage stats collected during previous boot: %s",
488               status.getMessage());
489         return;
490     }
491     if (DEBUG) {
492         ALOGD("Requested today's I/O usage stats collected during previous boot.");
493     }
494 }
495 
onTodayIoUsageStatsFetched(const std::vector<UserPackageIoUsageStats> & userPackageIoUsageStats)496 Result<void> IoOveruseMonitor::onTodayIoUsageStatsFetched(
497         const std::vector<UserPackageIoUsageStats>& userPackageIoUsageStats) {
498     std::unique_lock writeLock(mRwMutex);
499     if (mDidReadTodayPrevBootStats) {
500         return {};
501     }
502     for (const auto& statsEntry : userPackageIoUsageStats) {
503         std::string uniqueId = uniquePackageIdStr(statsEntry.packageName,
504                                                   static_cast<userid_t>(statsEntry.userId));
505         if (auto it = mUserPackageDailyIoUsageById.find(uniqueId);
506             it != mUserPackageDailyIoUsageById.end()) {
507             it->second += statsEntry.ioUsageStats;
508             continue;
509         }
510         mPrevBootIoUsageStatsById.insert(std::pair(uniqueId, statsEntry.ioUsageStats));
511     }
512     mDidReadTodayPrevBootStats = true;
513     return {};
514 }
515 
notifyNativePackagesLocked(const std::unordered_map<uid_t,IoOveruseStats> & statsByUid)516 void IoOveruseMonitor::notifyNativePackagesLocked(
517         const std::unordered_map<uid_t, IoOveruseStats>& statsByUid) {
518     for (const auto& [uid, ioOveruseStats] : statsByUid) {
519         IResourceOveruseListener* listener;
520         if (const auto it = mOveruseListenersByUid.find(uid); it == mOveruseListenersByUid.end()) {
521             continue;
522         } else {
523             listener = it->second.get();
524         }
525         aidl::android::automotive::watchdog::ResourceOveruseStats stats;
526         stats.set<aidl::android::automotive::watchdog::ResourceOveruseStats::ioOveruseStats>(
527                 ioOveruseStats);
528         listener->onOveruse(stats);
529     }
530     if (DEBUG) {
531         ALOGD("Notified native packages on I/O overuse");
532     }
533 }
534 
updateResourceOveruseConfigurations(const std::vector<ResourceOveruseConfiguration> & configs)535 Result<void> IoOveruseMonitor::updateResourceOveruseConfigurations(
536         const std::vector<ResourceOveruseConfiguration>& configs) {
537     std::unique_lock writeLock(mRwMutex);
538     if (!isInitializedLocked()) {
539         return Error(EX_ILLEGAL_STATE) << name() << " is not initialized";
540     }
541     if (const auto result = mIoOveruseConfigs->update(configs); !result.ok()) {
542         return result;
543     }
544     // When mWriteToDiskThread is already active, don't create a new thread to perform the same
545     // work. This thread writes to disk only after acquiring the mRwMutex write lock and the below
546     // check is performed after acquiring the same write lock. Thus, if the thread is still active
547     // and mIsWriteToDiskPending is true at this point, it indicates the thread hasn't performed
548     // the write and will write the latest updated configs when it executes.
549     if (bool isJoinable = mWriteToDiskThread.joinable(); isJoinable && mIsWriteToDiskPending) {
550         ALOGW("Skipping resource overuse configs write to disk due to ongoing write");
551         return {};
552     } else if (isJoinable) {
553         // At this point we know the thread has completed execution. Join the thread before
554         // creating a new one. Failure to join can lead to a crash since std::thread cannot
555         // destruct a thread object without first calling join.
556         mWriteToDiskThread.join();
557     }
558     mIsWriteToDiskPending = true;
559     mWriteToDiskThread = std::thread([&]() {
560         ALOGI("Writing resource overuse configs to disk");
561         if (set_sched_policy(0, SP_BACKGROUND) != 0) {
562             ALOGW("Failed to set background scheduling priority for writing resource overuse "
563                   "configs to disk");
564         }
565         if (int result = pthread_setname_np(pthread_self(), "ResOveruseCfgWr"); result != 0) {
566             ALOGE("Failed to set thread name to 'ResOveruseCfgWr'");
567         }
568         std::unique_lock writeLock(mRwMutex);
569         if (mIoOveruseConfigs == nullptr) {
570             ALOGE("IoOveruseConfigs instance is null");
571         } else if (const auto result = mIoOveruseConfigs->writeToDisk(); !result.ok()) {
572             ALOGE("Failed to write resource overuse configs to disk: %s",
573                   result.error().message().c_str());
574         } else {
575             ALOGI("Successfully wrote resource overuse configs to disk");
576         }
577         mIsWriteToDiskPending = false;
578     });
579 
580     return {};
581 }
582 
getResourceOveruseConfigurations(std::vector<ResourceOveruseConfiguration> * configs) const583 Result<void> IoOveruseMonitor::getResourceOveruseConfigurations(
584         std::vector<ResourceOveruseConfiguration>* configs) const {
585     std::shared_lock readLock(mRwMutex);
586     if (!isInitializedLocked()) {
587         return Error(EX_ILLEGAL_STATE) << name() << " is not initialized";
588     }
589     mIoOveruseConfigs->get(configs);
590     return {};
591 }
592 
addIoOveruseListener(const std::shared_ptr<IResourceOveruseListener> & listener)593 Result<void> IoOveruseMonitor::addIoOveruseListener(
594         const std::shared_ptr<IResourceOveruseListener>& listener) {
595     if (listener == nullptr) {
596         return Error(EX_ILLEGAL_ARGUMENT) << "Must provide non-null listener";
597     }
598     auto binder = listener->asBinder();
599     pid_t callingPid = IPCThreadState::self()->getCallingPid();
600     uid_t callingUid = IPCThreadState::self()->getCallingUid();
601     {
602         std::unique_lock writeLock(mRwMutex);
603         if (!isInitializedLocked()) {
604             // mBinderDeathRecipient is initialized inside init.
605             return Error(EX_ILLEGAL_STATE) << "Service is not initialized";
606         }
607         if (findListenerAndProcessLocked(reinterpret_cast<uintptr_t>(binder.get()), nullptr)) {
608             ALOGW("Failed to register the I/O overuse listener (pid: %d, uid: %d) as it is already "
609                   "registered",
610                   callingPid, callingUid);
611             return {};
612         }
613         mOveruseListenersByUid[callingUid] = listener;
614     }
615     AIBinder* aiBinder = binder.get();
616     auto status = mDeathRegistrationWrapper->linkToDeath(aiBinder, mBinderDeathRecipient.get(),
617                                                          static_cast<void*>(aiBinder));
618     if (!status.isOk()) {
619         std::unique_lock writeLock(mRwMutex);
620         if (const auto& it = mOveruseListenersByUid.find(callingUid);
621             it != mOveruseListenersByUid.end() && it->second->asBinder() == binder) {
622             mOveruseListenersByUid.erase(it);
623         }
624         return Error(EX_ILLEGAL_STATE) << "Failed to add I/O overuse listener: (pid " << callingPid
625                                        << ", uid: " << callingUid << ") is dead";
626     }
627     if (DEBUG) {
628         ALOGD("Added I/O overuse listener for uid: %d", callingUid);
629     }
630     return {};
631 }
632 
removeIoOveruseListener(const std::shared_ptr<IResourceOveruseListener> & listener)633 Result<void> IoOveruseMonitor::removeIoOveruseListener(
634         const std::shared_ptr<IResourceOveruseListener>& listener) {
635     if (listener == nullptr) {
636         return Error(EX_ILLEGAL_ARGUMENT) << "Must provide non-null listener";
637     }
638     std::unique_lock writeLock(mRwMutex);
639     if (!isInitializedLocked()) {
640         // mBinderDeathRecipient is initialized inside init.
641         return Error(EX_ILLEGAL_STATE) << "Service is not initialized";
642     }
643     const auto processor = [&](ListenersByUidMap& listeners, ListenersByUidMap::const_iterator it) {
644         AIBinder* aiBinder = it->second->asBinder().get();
645         mDeathRegistrationWrapper->unlinkToDeath(aiBinder, mBinderDeathRecipient.get(),
646                                                  static_cast<void*>(aiBinder));
647         listeners.erase(it);
648     };
649     if (!findListenerAndProcessLocked(reinterpret_cast<uintptr_t>(listener->asBinder().get()),
650                                       processor)) {
651         return Error(EX_ILLEGAL_ARGUMENT) << "Listener is not previously registered";
652     }
653     if (DEBUG) {
654         ALOGD("Removed I/O overuse listener for uid: %d", IPCThreadState::self()->getCallingUid());
655     }
656     return {};
657 }
658 
getIoOveruseStats(IoOveruseStats * ioOveruseStats) const659 Result<void> IoOveruseMonitor::getIoOveruseStats(IoOveruseStats* ioOveruseStats) const {
660     if (!isInitialized()) {
661         return Error(EX_ILLEGAL_STATE) << "I/O overuse monitor is not initialized";
662     }
663     uid_t callingUid = IPCThreadState::self()->getCallingUid();
664     const auto packageInfosByUid = mPackageInfoResolver->getPackageInfosForUids({callingUid});
665     const PackageInfo* packageInfo;
666     if (const auto it = packageInfosByUid.find(callingUid); it == packageInfosByUid.end()) {
667         return Error(EX_ILLEGAL_ARGUMENT)
668                 << "Package information not available for calling UID(" << callingUid << ")";
669     } else {
670         packageInfo = &it->second;
671     }
672     std::shared_lock readLock(mRwMutex);
673     const UserPackageIoUsage* dailyIoUsage;
674     if (const auto it = mUserPackageDailyIoUsageById.find(
675                 uniquePackageIdStr(packageInfo->packageIdentifier));
676         it == mUserPackageDailyIoUsageById.end()) {
677         return Error(EX_ILLEGAL_ARGUMENT)
678                 << "Calling UID " << callingUid << " doesn't have I/O overuse stats";
679     } else {
680         dailyIoUsage = &it->second;
681     }
682     ioOveruseStats->killableOnOveruse = mIoOveruseConfigs->isSafeToKill(*packageInfo);
683     const auto thresholdBytes = mIoOveruseConfigs->fetchThreshold(*packageInfo);
684     ioOveruseStats->remainingWriteBytes =
685             diff(thresholdBytes,
686                  diff(dailyIoUsage->writtenBytes, dailyIoUsage->forgivenWriteBytes));
687     ioOveruseStats->totalOveruses = dailyIoUsage->totalOveruses;
688     ioOveruseStats->writtenBytes = dailyIoUsage->writtenBytes;
689     const auto [startTime, durationInSeconds] =
690             calculateStartAndDuration(mLastUserPackageIoMonitorTime);
691     ioOveruseStats->startTime = startTime;
692     ioOveruseStats->durationInSeconds = durationInSeconds;
693     if (DEBUG) {
694         ALOGD("Returning I/O overuse stats for uid: %d", callingUid);
695     }
696     return {};
697 }
698 
resetIoOveruseStats(const std::vector<std::string> & packageNames)699 Result<void> IoOveruseMonitor::resetIoOveruseStats(const std::vector<std::string>& packageNames) {
700     if (const auto status = mWatchdogServiceHelper->resetResourceOveruseStats(packageNames);
701         !status.isOk()) {
702         return Error() << "Failed to reset stats in watchdog service: " << status.getDescription();
703     }
704     std::unordered_set<std::string> uniquePackageNames;
705     std::copy(packageNames.begin(), packageNames.end(),
706               std::inserter(uniquePackageNames, uniquePackageNames.end()));
707     std::unique_lock writeLock(mRwMutex);
708     for (auto& [key, usage] : mUserPackageDailyIoUsageById) {
709         if (uniquePackageNames.find(usage.packageInfo.packageIdentifier.name) !=
710             uniquePackageNames.end()) {
711             usage.resetStats();
712         }
713     }
714     return {};
715 }
716 
removeStatsForUser(userid_t userId)717 void IoOveruseMonitor::removeStatsForUser(userid_t userId) {
718     std::unique_lock writeLock(mRwMutex);
719     for (auto it = mUserPackageDailyIoUsageById.begin();
720          it != mUserPackageDailyIoUsageById.end();) {
721         if (multiuser_get_user_id(it->second.packageInfo.packageIdentifier.uid) == userId) {
722             it = mUserPackageDailyIoUsageById.erase(it);
723         } else {
724             ++it;
725         }
726     }
727     // |mPrevBootIoUsageStatsById| keys are constructed using |uniquePackageIdStr| method. Thus, the
728     // key suffix would contain the userId. The value in this map is |IoUsageStats|, which doesn't
729     // contain the userId, so this is the only way to delete cached previous boot stats for
730     // the removed user.
731     std::string keySuffix = StringPrintf(":%" PRId32, userId);
732     for (auto it = mPrevBootIoUsageStatsById.begin(); it != mPrevBootIoUsageStatsById.end();) {
733         if (EndsWith(it->first, keySuffix)) {
734             it = mPrevBootIoUsageStatsById.erase(it);
735         } else {
736             ++it;
737         }
738     }
739     for (auto it = mLatestIoOveruseStats.begin(); it != mLatestIoOveruseStats.end();) {
740         if (multiuser_get_user_id(it->uid) == userId) {
741             it = mLatestIoOveruseStats.erase(it);
742         } else {
743             ++it;
744         }
745     }
746 }
747 
handleBinderDeath(void * cookie)748 void IoOveruseMonitor::handleBinderDeath(void* cookie) {
749     uintptr_t cookieId = reinterpret_cast<uintptr_t>(cookie);
750 
751     std::unique_lock writeLock(mRwMutex);
752     findListenerAndProcessLocked(cookieId,
753                                  [&](ListenersByUidMap& listeners,
754                                      ListenersByUidMap::const_iterator it) {
755                                      ALOGW("Resource overuse notification handler died for uid(%d)",
756                                            it->first);
757                                      listeners.erase(it);
758                                  });
759 }
760 
findListenerAndProcessLocked(uintptr_t binderPtrId,const Processor & processor)761 bool IoOveruseMonitor::findListenerAndProcessLocked(uintptr_t binderPtrId,
762                                                     const Processor& processor) {
763     for (auto it = mOveruseListenersByUid.begin(); it != mOveruseListenersByUid.end(); ++it) {
764         uintptr_t curBinderPtrId = reinterpret_cast<uintptr_t>(it->second->asBinder().get());
765         if (curBinderPtrId != binderPtrId) {
766             continue;
767         }
768         if (processor != nullptr) {
769             processor(mOveruseListenersByUid, it);
770         }
771         return true;
772     }
773     return false;
774 }
775 
UserPackageIoUsage(const PackageInfo & pkgInfo,const UidIoStats & uidIoStats,const bool isGarageModeActive)776 IoOveruseMonitor::UserPackageIoUsage::UserPackageIoUsage(const PackageInfo& pkgInfo,
777                                                          const UidIoStats& uidIoStats,
778                                                          const bool isGarageModeActive) {
779     packageInfo = pkgInfo;
780     if (isGarageModeActive) {
781         writtenBytes.garageModeBytes = uidIoStats.sumWriteBytes();
782     } else {
783         writtenBytes.foregroundBytes = uidIoStats.metrics[WRITE_BYTES][FOREGROUND];
784         writtenBytes.backgroundBytes = uidIoStats.metrics[WRITE_BYTES][BACKGROUND];
785     }
786 }
787 
operator +=(const UserPackageIoUsage & r)788 IoOveruseMonitor::UserPackageIoUsage& IoOveruseMonitor::UserPackageIoUsage::operator+=(
789         const UserPackageIoUsage& r) {
790     if (id() == r.id()) {
791         packageInfo = r.packageInfo;
792     }
793     writtenBytes = sum(writtenBytes, r.writtenBytes);
794     return *this;
795 }
796 
operator +=(const IoUsageStats & ioUsageStats)797 IoOveruseMonitor::UserPackageIoUsage& IoOveruseMonitor::UserPackageIoUsage::operator+=(
798         const IoUsageStats& ioUsageStats) {
799     writtenBytes = sum(writtenBytes, ioUsageStats.writtenBytes);
800     forgivenWriteBytes = sum(forgivenWriteBytes, ioUsageStats.forgivenWriteBytes);
801     totalOveruses += ioUsageStats.totalOveruses;
802     return *this;
803 }
804 
id() const805 const std::string IoOveruseMonitor::UserPackageIoUsage::id() const {
806     return uniquePackageIdStr(packageInfo.packageIdentifier);
807 }
808 
resetStats()809 void IoOveruseMonitor::UserPackageIoUsage::resetStats() {
810     writtenBytes = {};
811     forgivenWriteBytes = {};
812     totalOveruses = 0;
813     isPackageWarned = false;
814     lastSyncedWrittenBytes = 0;
815 }
816 
817 }  // namespace watchdog
818 }  // namespace automotive
819 }  // namespace android
820