1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #define STATSD_DEBUG false
17 #include "Log.h"
18 #include "OringDurationTracker.h"
19 #include "guardrail/StatsdStats.h"
20 
21 namespace android {
22 namespace os {
23 namespace statsd {
24 
25 using std::pair;
26 
OringDurationTracker(const ConfigKey & key,const int64_t id,const MetricDimensionKey & eventKey,const sp<ConditionWizard> & wizard,int conditionIndex,bool nesting,int64_t currentBucketStartNs,int64_t currentBucketNum,int64_t startTimeNs,int64_t bucketSizeNs,bool conditionSliced,bool fullLink,const vector<sp<AnomalyTracker>> & anomalyTrackers)27 OringDurationTracker::OringDurationTracker(const ConfigKey& key, const int64_t id,
28                                            const MetricDimensionKey& eventKey,
29                                            const sp<ConditionWizard>& wizard, int conditionIndex,
30                                            bool nesting, int64_t currentBucketStartNs,
31                                            int64_t currentBucketNum, int64_t startTimeNs,
32                                            int64_t bucketSizeNs, bool conditionSliced,
33                                            bool fullLink,
34                                            const vector<sp<AnomalyTracker>>& anomalyTrackers)
35     : DurationTracker(key, id, eventKey, wizard, conditionIndex, nesting, currentBucketStartNs,
36                       currentBucketNum, startTimeNs, bucketSizeNs, conditionSliced, fullLink,
37                       anomalyTrackers),
38       mStarted(),
39       mPaused() {
40     mLastStartTime = 0;
41 }
42 
hitGuardRail(const HashableDimensionKey & newKey,size_t dimensionHardLimit) const43 bool OringDurationTracker::hitGuardRail(const HashableDimensionKey& newKey,
44                                         size_t dimensionHardLimit) const {
45     // ===========GuardRail==============
46     // 1. Report the tuple count if the tuple count > soft limit
47     if (mConditionKeyMap.find(newKey) != mConditionKeyMap.end()) {
48         return false;
49     }
50     if (mConditionKeyMap.size() >= StatsdStats::kDimensionKeySizeSoftLimit) {
51         size_t newTupleCount = mConditionKeyMap.size() + 1;
52         StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mTrackerId, newTupleCount);
53         // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
54         if (newTupleCount > dimensionHardLimit) {
55             if (!mHasHitGuardrail) {
56                 ALOGE("OringDurTracker %lld dropping data for dimension key %s",
57                       (long long)mTrackerId, newKey.toString().c_str());
58                 mHasHitGuardrail = true;
59             }
60             StatsdStats::getInstance().noteHardDimensionLimitReached(mTrackerId);
61             return true;
62         }
63     }
64     return false;
65 }
66 
noteStart(const HashableDimensionKey & key,bool condition,const int64_t eventTime,const ConditionKey & conditionKey,size_t dimensionHardLimit)67 void OringDurationTracker::noteStart(const HashableDimensionKey& key, bool condition,
68                                      const int64_t eventTime, const ConditionKey& conditionKey,
69                                      size_t dimensionHardLimit) {
70     if (hitGuardRail(key, dimensionHardLimit)) {
71         return;
72     }
73     if (condition) {
74         if (mStarted.size() == 0) {
75             mLastStartTime = eventTime;
76             VLOG("record first start....");
77             startAnomalyAlarm(eventTime);
78         }
79         mStarted[key]++;
80     } else {
81         mPaused[key]++;
82     }
83 
84     if (mConditionSliced && mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
85         mConditionKeyMap[key] = conditionKey;
86     }
87     VLOG("Oring: %s start, condition %d", key.toString().c_str(), condition);
88 }
89 
noteStop(const HashableDimensionKey & key,const int64_t timestamp,const bool stopAll)90 void OringDurationTracker::noteStop(const HashableDimensionKey& key, const int64_t timestamp,
91                                     const bool stopAll) {
92     VLOG("Oring: %s stop", key.toString().c_str());
93     auto it = mStarted.find(key);
94     if (it != mStarted.end()) {
95         (it->second)--;
96         if (stopAll || !mNested || it->second <= 0) {
97             mStarted.erase(it);
98             mConditionKeyMap.erase(key);
99         }
100         if (mStarted.empty()) {
101             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
102                     (timestamp - mLastStartTime);
103             detectAndDeclareAnomaly(
104                     timestamp, mCurrentBucketNum,
105                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
106             VLOG("record duration %lld, total duration %lld for state key %s",
107                  (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
108                  mEventKey.getStateValuesKey().toString().c_str());
109         }
110     }
111 
112     auto pausedIt = mPaused.find(key);
113     if (pausedIt != mPaused.end()) {
114         (pausedIt->second)--;
115         if (stopAll || !mNested || pausedIt->second <= 0) {
116             mPaused.erase(pausedIt);
117             mConditionKeyMap.erase(key);
118         }
119     }
120     if (mStarted.empty()) {
121         stopAnomalyAlarm(timestamp);
122     }
123 }
124 
noteStopAll(const int64_t timestamp)125 void OringDurationTracker::noteStopAll(const int64_t timestamp) {
126     if (!mStarted.empty()) {
127         mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
128                 (timestamp - mLastStartTime);
129         VLOG("Oring Stop all: record duration %lld, total duration %lld for state key %s",
130              (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
131              mEventKey.getStateValuesKey().toString().c_str());
132         detectAndDeclareAnomaly(
133                 timestamp, mCurrentBucketNum,
134                 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
135     }
136 
137     stopAnomalyAlarm(timestamp);
138     mStarted.clear();
139     mPaused.clear();
140     mConditionKeyMap.clear();
141 }
142 
flushCurrentBucket(const int64_t eventTimeNs,const optional<UploadThreshold> & uploadThreshold,const int64_t globalConditionTrueNs,std::unordered_map<MetricDimensionKey,std::vector<DurationBucket>> * output)143 bool OringDurationTracker::flushCurrentBucket(
144         const int64_t eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
145         const int64_t globalConditionTrueNs,
146         std::unordered_map<MetricDimensionKey, std::vector<DurationBucket>>* output) {
147     VLOG("OringDurationTracker Flushing.............");
148 
149     // Note that we have to mimic the bucket time changes we do in the
150     // MetricProducer#notifyAppUpgrade.
151 
152     int numBucketsForward = 0;
153     int64_t fullBucketEnd = getCurrentBucketEndTimeNs();
154     int64_t currentBucketEndTimeNs;
155 
156     bool isFullBucket = eventTimeNs >= fullBucketEnd;
157     if (isFullBucket) {
158         numBucketsForward = 1 + (eventTimeNs - fullBucketEnd) / mBucketSizeNs;
159         currentBucketEndTimeNs = fullBucketEnd;
160     } else {
161         // This must be a partial bucket.
162         currentBucketEndTimeNs = eventTimeNs;
163     }
164 
165     // Process the current bucket.
166     if (mStarted.size() > 0) {
167         // Calculate the duration for the current state key.
168         mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
169                 (currentBucketEndTimeNs - mLastStartTime);
170     }
171     // Store DurationBucket info for each whatKey, stateKey pair.
172     // Note: The whatKey stored in mEventKey is constant for each DurationTracker, while the
173     // stateKey stored in mEventKey is only the current stateKey. mStateKeyDurationMap is used to
174     // store durations for each stateKey, so we need to flush the bucket by creating a
175     // DurationBucket for each stateKey.
176     for (auto& durationIt : mStateKeyDurationMap) {
177         durationIt.second.mDurationFullBucket += durationIt.second.mDuration;
178         if (durationPassesThreshold(uploadThreshold, durationIt.second.mDuration)) {
179             DurationBucket current_info;
180             current_info.mBucketStartNs = mCurrentBucketStartTimeNs;
181             current_info.mBucketEndNs = currentBucketEndTimeNs;
182             current_info.mDuration = durationIt.second.mDuration;
183             current_info.mConditionTrueNs = globalConditionTrueNs;
184             (*output)[MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first)]
185                     .push_back(current_info);
186             VLOG("  duration: %lld", (long long)current_info.mDuration);
187         } else {
188             VLOG("  duration: %lld does not pass set threshold",
189                  (long long)durationIt.second.mDuration);
190         }
191 
192         if (isFullBucket) {
193             // End of full bucket, can send to anomaly tracker now.
194             addPastBucketToAnomalyTrackers(
195                     MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first),
196                     getCurrentStateKeyFullBucketDuration(), mCurrentBucketNum);
197         }
198         durationIt.second.mDuration = 0;
199     }
200     // Full bucket is only needed when we have anomaly trackers.
201     if (isFullBucket || mAnomalyTrackers.empty()) {
202         mStateKeyDurationMap.clear();
203     }
204 
205     if (mStarted.size() > 0) {
206         for (int i = 1; i < numBucketsForward; i++) {
207             DurationBucket info;
208             info.mBucketStartNs = fullBucketEnd + mBucketSizeNs * (i - 1);
209             info.mBucketEndNs = info.mBucketStartNs + mBucketSizeNs;
210             info.mDuration = mBucketSizeNs;
211             // Full duration buckets are attributed to the current stateKey.
212             (*output)[mEventKey].push_back(info);
213             // Safe to send these buckets to anomaly tracker since they must be full buckets.
214             // If it's a partial bucket, numBucketsForward would be 0.
215             addPastBucketToAnomalyTrackers(mEventKey, info.mDuration, mCurrentBucketNum + i);
216             VLOG("  add filling bucket with duration %lld", (long long)info.mDuration);
217         }
218     } else {
219         if (numBucketsForward >= 2) {
220             addPastBucketToAnomalyTrackers(mEventKey, 0, mCurrentBucketNum + numBucketsForward - 1);
221         }
222     }
223 
224     if (numBucketsForward > 0) {
225         mCurrentBucketStartTimeNs = fullBucketEnd + (numBucketsForward - 1) * mBucketSizeNs;
226         mCurrentBucketNum += numBucketsForward;
227     } else {  // We must be forming a partial bucket.
228         mCurrentBucketStartTimeNs = eventTimeNs;
229     }
230     mLastStartTime = mCurrentBucketStartTimeNs;
231     // Reset mHasHitGuardrail boolean since bucket was reset
232     mHasHitGuardrail = false;
233 
234     // If all stopped, then tell owner it's safe to remove this tracker on a full bucket.
235     // On a partial bucket, only clear if no anomaly trackers, as full bucket duration is used
236     // for anomaly detection.
237     // Note: Anomaly trackers can be added on config updates, in which case mAnomalyTrackers > 0 and
238     // the full bucket duration could be used, but this is very rare so it is okay to clear.
239     return mStarted.empty() && mPaused.empty() && (isFullBucket || mAnomalyTrackers.size() == 0);
240 }
241 
flushIfNeeded(int64_t eventTimeNs,const optional<UploadThreshold> & uploadThreshold,unordered_map<MetricDimensionKey,vector<DurationBucket>> * output)242 bool OringDurationTracker::flushIfNeeded(
243         int64_t eventTimeNs, const optional<UploadThreshold>& uploadThreshold,
244         unordered_map<MetricDimensionKey, vector<DurationBucket>>* output) {
245     if (eventTimeNs < getCurrentBucketEndTimeNs()) {
246         return false;
247     }
248     return flushCurrentBucket(eventTimeNs, uploadThreshold, /*globalConditionTrueNs=*/0, output);
249 }
250 
onSlicedConditionMayChange(const int64_t timestamp)251 void OringDurationTracker::onSlicedConditionMayChange(const int64_t timestamp) {
252     vector<pair<HashableDimensionKey, int>> startedToPaused;
253     vector<pair<HashableDimensionKey, int>> pausedToStarted;
254     if (!mStarted.empty()) {
255         for (auto it = mStarted.begin(); it != mStarted.end();) {
256             const auto& key = it->first;
257             const auto& condIt = mConditionKeyMap.find(key);
258             if (condIt == mConditionKeyMap.end()) {
259                 VLOG("Key %s dont have condition key", key.toString().c_str());
260                 ++it;
261                 continue;
262             }
263             ConditionState conditionState =
264                 mWizard->query(mConditionTrackerIndex, condIt->second,
265                                !mHasLinksToAllConditionDimensionsInTracker);
266             if (conditionState != ConditionState::kTrue) {
267                 startedToPaused.push_back(*it);
268                 it = mStarted.erase(it);
269                 VLOG("Key %s started -> paused", key.toString().c_str());
270             } else {
271                 ++it;
272             }
273         }
274 
275         if (mStarted.empty()) {
276             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
277                     (timestamp - mLastStartTime);
278             VLOG("record duration %lld, total duration %lld for state key %s",
279                  (long long)(timestamp - mLastStartTime), (long long)getCurrentStateKeyDuration(),
280                  mEventKey.getStateValuesKey().toString().c_str());
281             detectAndDeclareAnomaly(
282                     timestamp, mCurrentBucketNum,
283                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
284         }
285     }
286 
287     if (!mPaused.empty()) {
288         for (auto it = mPaused.begin(); it != mPaused.end();) {
289             const auto& key = it->first;
290             if (mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
291                 VLOG("Key %s dont have condition key", key.toString().c_str());
292                 ++it;
293                 continue;
294             }
295             ConditionState conditionState =
296                 mWizard->query(mConditionTrackerIndex, mConditionKeyMap[key],
297                                !mHasLinksToAllConditionDimensionsInTracker);
298             if (conditionState == ConditionState::kTrue) {
299                 pausedToStarted.push_back(*it);
300                 it = mPaused.erase(it);
301                 VLOG("Key %s paused -> started", key.toString().c_str());
302             } else {
303                 ++it;
304             }
305         }
306 
307         if (mStarted.empty() && pausedToStarted.size() > 0) {
308             mLastStartTime = timestamp;
309         }
310     }
311 
312     if (mStarted.empty() && !pausedToStarted.empty()) {
313         startAnomalyAlarm(timestamp);
314     }
315     mStarted.insert(pausedToStarted.begin(), pausedToStarted.end());
316     mPaused.insert(startedToPaused.begin(), startedToPaused.end());
317 
318     if (mStarted.empty()) {
319         stopAnomalyAlarm(timestamp);
320     }
321 }
322 
onConditionChanged(bool condition,const int64_t timestamp)323 void OringDurationTracker::onConditionChanged(bool condition, const int64_t timestamp) {
324     if (condition) {
325         if (!mPaused.empty()) {
326             VLOG("Condition true, all started");
327             if (mStarted.empty()) {
328                 mLastStartTime = timestamp;
329             }
330             if (mStarted.empty() && !mPaused.empty()) {
331                 startAnomalyAlarm(timestamp);
332             }
333             mStarted.insert(mPaused.begin(), mPaused.end());
334             mPaused.clear();
335         }
336     } else {
337         if (!mStarted.empty()) {
338             VLOG("Condition false, all paused");
339             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
340                     (timestamp - mLastStartTime);
341             mPaused.insert(mStarted.begin(), mStarted.end());
342             mStarted.clear();
343             detectAndDeclareAnomaly(
344                     timestamp, mCurrentBucketNum,
345                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
346         }
347     }
348     if (mStarted.empty()) {
349         stopAnomalyAlarm(timestamp);
350     }
351 }
352 
onStateChanged(const int64_t timestamp,const int32_t atomId,const FieldValue & newState)353 void OringDurationTracker::onStateChanged(const int64_t timestamp, const int32_t atomId,
354                                           const FieldValue& newState) {
355     // Nothing needs to be done on a state change if we have not seen a start
356     // event, the metric is currently not active, or condition is false.
357     // For these cases, no keys are being tracked in mStarted, so update
358     // the current state key and return.
359     if (mStarted.empty()) {
360         updateCurrentStateKey(atomId, newState);
361         return;
362     }
363     // Add the current duration length to the previous state key and then update
364     // the last start time and current state key.
365     mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration += (timestamp - mLastStartTime);
366     mLastStartTime = timestamp;
367     updateCurrentStateKey(atomId, newState);
368 }
369 
hasAccumulatedDuration() const370 bool OringDurationTracker::hasAccumulatedDuration() const {
371     return !mStarted.empty() || !mPaused.empty() || !mStateKeyDurationMap.empty();
372 }
373 
hasStartedDuration() const374 bool OringDurationTracker::hasStartedDuration() const {
375     return !mStarted.empty();
376 }
377 
predictAnomalyTimestampNs(const AnomalyTracker & anomalyTracker,const int64_t eventTimestampNs) const378 int64_t OringDurationTracker::predictAnomalyTimestampNs(const AnomalyTracker& anomalyTracker,
379                                                         const int64_t eventTimestampNs) const {
380     // The anomaly threshold.
381     const int64_t thresholdNs = anomalyTracker.getAnomalyThreshold();
382 
383     // The timestamp of the current bucket end.
384     const int64_t currentBucketEndNs = getCurrentBucketEndTimeNs();
385 
386     // The past duration ns for the current bucket of the current stateKey.
387     int64_t currentStateBucketPastNs =
388             getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration();
389 
390     // As we move into the future, old buckets get overwritten (so their old data is erased).
391     // Sum of past durations. Will change as we overwrite old buckets.
392     int64_t pastNs = currentStateBucketPastNs + anomalyTracker.getSumOverPastBuckets(mEventKey);
393 
394     // The refractory period end timestamp for dimension mEventKey.
395     const int64_t refractoryPeriodEndNs =
396             anomalyTracker.getRefractoryPeriodEndsSec(mEventKey) * NS_PER_SEC;
397 
398     // The anomaly should happen when accumulated wakelock duration is above the threshold and
399     // not within the refractory period.
400     int64_t anomalyTimestampNs =
401         std::max(eventTimestampNs + thresholdNs - pastNs, refractoryPeriodEndNs);
402     // If the predicted the anomaly timestamp is within the current bucket, return it directly.
403     if (anomalyTimestampNs <= currentBucketEndNs) {
404         return std::max(eventTimestampNs, anomalyTimestampNs);
405     }
406 
407     // Remove the old bucket.
408     if (anomalyTracker.getNumOfPastBuckets() > 0) {
409         pastNs -= anomalyTracker.getPastBucketValue(
410                             mEventKey,
411                             mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets());
412         // Add the remaining of the current bucket to the accumulated wakelock duration.
413         pastNs += (currentBucketEndNs - eventTimestampNs);
414     } else {
415         // The anomaly depends on only one bucket.
416         pastNs = 0;
417     }
418 
419     // The anomaly will not happen in the current bucket. We need to iterate over the future buckets
420     // to predict the accumulated wakelock duration and determine the anomaly timestamp accordingly.
421     for (int futureBucketIdx = 1; futureBucketIdx <= anomalyTracker.getNumOfPastBuckets() + 1;
422             futureBucketIdx++) {
423         // The alarm candidate timestamp should meet two requirements:
424         // 1. the accumulated wakelock duration is above the threshold.
425         // 2. it is not within the refractory period.
426         // 3. the alarm timestamp falls in this bucket. Otherwise we need to flush the past buckets,
427         //    find the new alarm candidate timestamp and check these requirements again.
428         const int64_t bucketEndNs = currentBucketEndNs + futureBucketIdx * mBucketSizeNs;
429         int64_t anomalyTimestampNs =
430             std::max(bucketEndNs - mBucketSizeNs + thresholdNs - pastNs, refractoryPeriodEndNs);
431         if (anomalyTimestampNs <= bucketEndNs) {
432             return anomalyTimestampNs;
433         }
434         if (anomalyTracker.getNumOfPastBuckets() <= 0) {
435             continue;
436         }
437 
438         // No valid alarm timestamp is found in this bucket. The clock moves to the end of the
439         // bucket. Update the pastNs.
440         pastNs += mBucketSizeNs;
441         // 1. If the oldest past bucket is still in the past bucket window, we could fetch the past
442         // bucket and erase it from pastNs.
443         // 2. If the oldest past bucket is the current bucket, we should compute the
444         //   wakelock duration in the current bucket and erase it from pastNs.
445         // 3. Otherwise all othe past buckets are ancient.
446         if (futureBucketIdx < anomalyTracker.getNumOfPastBuckets()) {
447             pastNs -= anomalyTracker.getPastBucketValue(
448                     mEventKey,
449                     mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets() + futureBucketIdx);
450         } else if (futureBucketIdx == anomalyTracker.getNumOfPastBuckets()) {
451             pastNs -= (currentStateBucketPastNs + (currentBucketEndNs - eventTimestampNs));
452         }
453     }
454 
455     return std::max(eventTimestampNs + thresholdNs, refractoryPeriodEndNs);
456 }
457 
dumpStates(int out,bool verbose) const458 void OringDurationTracker::dumpStates(int out, bool verbose) const {
459     dprintf(out, "\t\t started count %lu\n", (unsigned long)mStarted.size());
460     dprintf(out, "\t\t paused count %lu\n", (unsigned long)mPaused.size());
461     dprintf(out, "\t\t current duration %lld\n", (long long)getCurrentStateKeyDuration());
462 }
463 
getCurrentStateKeyDuration() const464 int64_t OringDurationTracker::getCurrentStateKeyDuration() const {
465     auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
466     if (it == mStateKeyDurationMap.end()) {
467         return 0;
468     } else {
469         return it->second.mDuration;
470     }
471 }
472 
getCurrentStateKeyFullBucketDuration() const473 int64_t OringDurationTracker::getCurrentStateKeyFullBucketDuration() const {
474     auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
475     if (it == mStateKeyDurationMap.end()) {
476         return 0;
477     } else {
478         return it->second.mDurationFullBucket;
479     }
480 }
481 
updateCurrentStateKey(const int32_t atomId,const FieldValue & newState)482 void OringDurationTracker::updateCurrentStateKey(const int32_t atomId, const FieldValue& newState) {
483     HashableDimensionKey* stateValuesKey = mEventKey.getMutableStateValuesKey();
484     for (size_t i = 0; i < stateValuesKey->getValues().size(); i++) {
485         if (stateValuesKey->getValues()[i].mField.getTag() == atomId) {
486             stateValuesKey->mutableValue(i)->mValue = newState.mValue;
487         }
488     }
489 }
490 
491 }  // namespace statsd
492 }  // namespace os
493 }  // namespace android
494