/* * Copyright (C) 2021 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace android::mediautils { /** * A thread for deferred execution of tasks, with cancellation. */ class TimerThread { public: // A Handle is a time_point that serves as a unique key to access a queued // request to the TimerThread. using Handle = std::chrono::steady_clock::time_point; // Duration is based on steady_clock (typically nanoseconds) // vs the system_clock duration (typically microseconds). using Duration = std::chrono::steady_clock::duration; static inline constexpr Handle INVALID_HANDLE = std::chrono::steady_clock::time_point::min(); // Handle implementation details: // A Handle represents the timer expiration time based on std::chrono::steady_clock // (clock monotonic). This Handle is computed as now() + timeout. // // The lsb of the Handle time_point is adjusted to indicate whether there is // a timeout action (1) or not (0). // template static constexpr bool is_power_of_2_v = COUNT > 0 && (COUNT & (COUNT - 1)) == 0; template static constexpr size_t mask_from_count_v = COUNT - 1; static constexpr size_t HANDLE_TYPES = 2; // HANDLE_TYPES must be a power of 2. static_assert(is_power_of_2_v); // The handle types enum class HANDLE_TYPE : size_t { NO_TIMEOUT = 0, TIMEOUT = 1, }; static constexpr size_t HANDLE_TYPE_MASK = mask_from_count_v; template static constexpr auto enum_as_value(T x) { return static_cast>(x); } static inline bool isNoTimeoutHandle(Handle handle) { return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) == enum_as_value(HANDLE_TYPE::NO_TIMEOUT); } static inline bool isTimeoutHandle(Handle handle) { return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) == enum_as_value(HANDLE_TYPE::TIMEOUT); } // Returns a unique Handle that doesn't exist in the container. template static Handle getUniqueHandleForHandleType_l(C container, T timeout) { static_assert(MAX_TYPED_HANDLES > 0 && HANDLE_TYPE_AS_VALUE < MAX_TYPED_HANDLES && is_power_of_2_v, " handles must be power of two"); // Our initial handle is the deadline as computed from steady_clock. auto deadline = std::chrono::steady_clock::now() + timeout; // We adjust the lsbs by the minimum increment to have the correct // HANDLE_TYPE in the least significant bits. auto remainder = deadline.time_since_epoch().count() & HANDLE_TYPE_MASK; size_t offset = HANDLE_TYPE_AS_VALUE > remainder ? HANDLE_TYPE_AS_VALUE - remainder : MAX_TYPED_HANDLES + HANDLE_TYPE_AS_VALUE - remainder; deadline += std::chrono::steady_clock::duration(offset); // To avoid key collisions, advance the handle by MAX_TYPED_HANDLES (the modulus factor) // until the key is unique. while (container.find(deadline) != container.end()) { deadline += std::chrono::steady_clock::duration(MAX_TYPED_HANDLES); } return deadline; } // TimerCallback invoked on timeout or cancel. using TimerCallback = std::function; /** * Schedules a task to be executed in the future (`timeout` duration from now). * * \param tag string associated with the task. This need not be unique, * as the Handle returned is used for cancelling. * \param func callback function that is invoked at the timeout. * \param timeoutDuration timeout duration which is converted to milliseconds with at * least 45 integer bits. * A timeout of 0 (or negative) means the timer never expires * so func() is never called. These tasks are stored internally * and reported in the toString() until manually cancelled. * \returns a handle that can be used for cancellation. */ Handle scheduleTask( std::string_view tag, TimerCallback&& func, Duration timeoutDuration, Duration secondChanceDuration); /** * Tracks a task that shows up on toString() until cancelled. * * \param tag string associated with the task. * \returns a handle that can be used for cancellation. */ Handle trackTask(std::string_view tag); /** * Cancels a task previously scheduled with scheduleTask() * or trackTask(). * * \returns true if cancelled. If the task has already executed * or if the handle doesn't exist, this is a no-op * and returns false. */ bool cancelTask(Handle handle); struct SnapshotAnalysis; /** * Take a snapshot of the current state of the TimerThread and determine the * potential cause of a deadlock. * \param retiredCount The number of successfully retired calls to capture * (may be many). * \return See below for a description of a SnapShotAnalysis object */ SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const; /** * Returns a string representation of the TimerThread queue. * * The queue is dumped in order of scheduling (not deadline). */ std::string pendingToString() const; /** * Returns a string representation of the last retired tasks. * * These tasks from trackTask() or scheduleTask() are * cancelled. * * These are ordered when the task was retired. * * \param n is maximum number of tasks to dump. */ std::string retiredToString(size_t n = SIZE_MAX) const; /** * Returns a string representation of the last timeout tasks. * * These tasks from scheduleTask() which have timed-out. * * These are ordered when the task had timed-out. * * \param n is maximum number of tasks to dump. */ std::string timeoutToString(size_t n = SIZE_MAX) const; /** * Dumps a container with SmartPointer to a string. * * "{ Request1 } { Request2} ...{ RequestN }" */ template static std::string requestsToString(const T& containerRequests) { std::string s; // append seems to be faster than stringstream. // https://stackoverflow.com/questions/18892281/most-optimized-way-of-concatenation-in-strings for (const auto& request : containerRequests) { s.append("{ ").append(request->toString()).append(" } "); } // If not empty, there's an extra space at the end, so we trim it off. if (!s.empty()) s.pop_back(); return s; } // To minimize movement of data, we pass around shared_ptrs to Requests. // These are allocated and deallocated outside of the lock. // TODO(b/243839867) consider options to merge Request with the // TimeCheck::TimeCheckHandler struct. struct Request { Request(std::chrono::system_clock::time_point _scheduled, std::chrono::system_clock::time_point _deadline, Duration _secondChanceDuration, pid_t _tid, std::string_view _tag) : scheduled(_scheduled) , deadline(_deadline) , secondChanceDuration(_secondChanceDuration) , tid(_tid) , tag(_tag) {} const std::chrono::system_clock::time_point scheduled; const std::chrono::system_clock::time_point deadline; // deadline := scheduled // + timeoutDuration // + secondChanceDuration // if deadline == scheduled, no // timeout, task not executed. Duration secondChanceDuration; const pid_t tid; const FixedString62 tag; std::string toString() const; }; // SnapshotAnalysis contains info deduced by analysisTimeout(). struct SnapshotAnalysis { // If we were unable to determine any applicable thread ids, // we leave their value as INVALID_PID. // Note, we use the linux thread id (not pthread), so its type is pid_t. static constexpr pid_t INVALID_PID = -1; // Description of likely issue and/or blocked method. // Empty if no actionable info. std::string description; // Tid of the (latest) monitored thread which has timed out. // This is the thread which the suspect is deduced with respect to. // Most often, this is the thread which an abort is being triggered // from. pid_t timeoutTid = INVALID_PID; // Tid of the (HAL) thread which has likely halted progress, selected // from pendingRequests. May be the same as timeoutTid, if the timed-out // thread directly called into the HAL. pid_t suspectTid = INVALID_PID; // Number of second chances given by the timer thread size_t secondChanceCount; // List of pending requests std::vector> pendingRequests; // List of timed-out requests std::vector> timeoutRequests; // List of retired requests std::vector> retiredRequests; // mutex deadlock / wait detection information. bool hasMutexCycle = false; std::vector> mutexWaitChain; // Dumps the information contained above as well as additional call // stacks where applicable. std::string toString(bool showTimeoutStack = true) const; }; private: // Deque of requests, in order of add(). // This class is thread-safe. class RequestQueue { public: explicit RequestQueue(size_t maxSize) : mRequestQueueMax(maxSize) {} void add(std::shared_ptr); // return up to the last "n" requests retired. void copyRequests(std::vector>& requests, size_t n = SIZE_MAX) const; private: const size_t mRequestQueueMax; mutable std::mutex mRQMutex; std::deque>> mRequestQueue GUARDED_BY(mRQMutex); }; // A storage map of tasks without timeouts. There is no TimerCallback // required, it just tracks the tasks with the tag, scheduled time and the tid. // These tasks show up on a pendingToString() until manually cancelled. class NoTimeoutMap { mutable std::mutex mNTMutex; std::map> mMap GUARDED_BY(mNTMutex); Handle getUniqueHandle_l() REQUIRES(mNTMutex) { return getUniqueHandleForHandleType_l< HANDLE_TYPES, enum_as_value(HANDLE_TYPE::NO_TIMEOUT)>( mMap, Duration{} /* timeout */); } public: bool isValidHandle(Handle handle) const; // lock free Handle add(std::shared_ptr request); std::shared_ptr remove(Handle handle); void copyRequests(std::vector>& requests) const; }; // Monitor thread. // This thread manages shared pointers to Requests and a function to // call on timeout. // This class is thread-safe. class MonitorThread { std::atomic mSecondChanceCount{}; mutable std::mutex mMutex; mutable std::condition_variable mCond GUARDED_BY(mMutex); // Ordered map of requests based on time of deadline. // std::map, TimerCallback>> mMonitorRequests GUARDED_BY(mMutex); // Due to monotonic/steady clock inaccuracies during suspend, // we allow an additional second chance waiting time to prevent // false removal. // This mSecondChanceRequests queue is almost always empty. // Using a pair with the original handle allows lookup and keeps // the Key unique. std::map, std::pair, TimerCallback>> mSecondChanceRequests GUARDED_BY(mMutex); RequestQueue& mTimeoutQueue GUARDED_BY(mMutex); // added to when request times out. // Worker thread variables bool mShouldExit GUARDED_BY(mMutex) = false; // To avoid race with initialization, // mThread should be initialized last as the thread is launched immediately. std::thread mThread; void threadFunc(); Handle getUniqueHandle_l(Duration timeout) REQUIRES(mMutex) { return getUniqueHandleForHandleType_l< HANDLE_TYPES, enum_as_value(HANDLE_TYPE::TIMEOUT)>( mMonitorRequests, timeout); } public: MonitorThread(RequestQueue &timeoutQueue); ~MonitorThread(); Handle add(std::shared_ptr request, TimerCallback&& func, Duration timeout); std::shared_ptr remove(Handle handle); void copyRequests(std::vector>& requests) const; size_t getSecondChanceCount() const { return mSecondChanceCount.load(std::memory_order_relaxed); } }; // A HAL method is where the substring "Hidl" is in the class name. // The tag should look like: ... Hidl ... :: ... static bool isRequestFromHal(const std::shared_ptr& request); std::vector> getPendingRequests() const; static constexpr size_t kRetiredQueueMax = 16; RequestQueue mRetiredQueue{kRetiredQueueMax}; // locked internally static constexpr size_t kTimeoutQueueMax = 16; RequestQueue mTimeoutQueue{kTimeoutQueueMax}; // locked internally NoTimeoutMap mNoTimeoutMap; // locked internally MonitorThread mMonitorThread{mTimeoutQueue}; // This should be initialized last because // the thread is launched immediately. // Locked internally. }; } // namespace android::mediautils