/* * Copyright (C) 2019 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "Burst.h" #include "BurstUtils.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Callbacks.h" #include "Conversions.h" #include "Tracing.h" #include "Utils.h" namespace android::hardware::neuralnetworks::V1_2::utils { namespace { class BurstExecution final : public nn::IExecution, public std::enable_shared_from_this { struct PrivateConstructorTag {}; public: static nn::GeneralResult> create( std::shared_ptr controller, std::vector request, hal::utils::RequestRelocation relocation, std::vector cacheHolds); BurstExecution(PrivateConstructorTag tag, std::shared_ptr controller, std::vector request, hal::utils::RequestRelocation relocation, std::vector cacheHolds); nn::ExecutionResult, nn::Timing>> compute( const nn::OptionalTimePoint& deadline) const override; nn::GeneralResult> computeFenced( const std::vector& waitFor, const nn::OptionalTimePoint& deadline, const nn::OptionalDuration& timeoutDurationAfterFence) const override; private: const std::shared_ptr kController; const std::vector kRequest; const hal::utils::RequestRelocation kRelocation; const std::vector kCacheHolds; }; nn::GeneralResult> executionBurstResultCallback( V1_0::ErrorStatus status, const sp& burstContext) { HANDLE_STATUS_HIDL(status) << "IPreparedModel::configureExecutionBurst failed with status " << toString(status); if (burstContext == nullptr) { return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; } return burstContext; } nn::GeneralResult> getMemoriesHelper( const hidl_vec& slots, const std::shared_ptr& memoryCache) { hidl_vec memories(slots.size()); for (size_t i = 0; i < slots.size(); ++i) { const int32_t slot = slots[i]; const auto memory = NN_TRY(memoryCache->getMemory(slot)); memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory)); if (!memories[i].valid()) { return NN_ERROR() << "memory at slot " << slot << " is invalid"; } } return memories; } } // namespace // MemoryCache methods Burst::MemoryCache::MemoryCache() { constexpr size_t kPreallocatedCount = 1024; std::vector freeSlotsSpace; freeSlotsSpace.reserve(kPreallocatedCount); mFreeSlots = std::stack>(std::move(freeSlotsSpace)); mMemoryCache.reserve(kPreallocatedCount); mCacheCleaner.reserve(kPreallocatedCount); } void Burst::MemoryCache::setBurstContext(sp burstContext) { std::lock_guard guard(mMutex); mBurstContext = std::move(burstContext); } std::pair Burst::MemoryCache::cacheMemory( const nn::SharedMemory& memory) { std::unique_lock lock(mMutex); base::ScopedLockAssertion lockAssert(mMutex); // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is // not currently being freed. auto iter = mMemoryIdToSlot.find(memory); while (iter != mMemoryIdToSlot.end()) { const int32_t slot = iter->second; if (auto cleaner = mCacheCleaner.at(slot).lock()) { return std::make_pair(slot, std::move(cleaner)); } // If the code reaches this point, the Memory object was in the cache, but is currently // being destroyed. This code waits until the cache entry has been freed, then loops to // ensure the cache entry has been freed or has been made present by another thread. mCond.wait(lock); iter = mMemoryIdToSlot.find(memory); } // Allocate a new cache entry. const int32_t slot = allocateSlotLocked(); mMemoryIdToSlot[memory] = slot; mMemoryCache[slot] = memory; // Create reference-counted self-cleaning cache object. auto self = weak_from_this(); Task cleanup = [memory, memoryCache = std::move(self)] { if (const auto lock = memoryCache.lock()) { lock->freeMemory(memory); } }; auto cleaner = std::make_shared(std::move(cleanup)); mCacheCleaner[slot] = cleaner; return std::make_pair(slot, std::move(cleaner)); } nn::GeneralResult Burst::MemoryCache::getMemory(int32_t slot) { std::lock_guard guard(mMutex); if (slot < 0 || static_cast(slot) >= mMemoryCache.size()) { return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size(); } return mMemoryCache[slot]; } void Burst::MemoryCache::freeMemory(const nn::SharedMemory& memory) { { std::lock_guard guard(mMutex); const int32_t slot = mMemoryIdToSlot.at(memory); if (mBurstContext) { const auto ret = mBurstContext->freeMemory(slot); if (!ret.isOk()) { LOG(ERROR) << "IBustContext::freeMemory failed: " << ret.description(); } } mMemoryIdToSlot.erase(memory); mMemoryCache[slot] = {}; mCacheCleaner[slot].reset(); mFreeSlots.push(slot); } mCond.notify_all(); } int32_t Burst::MemoryCache::allocateSlotLocked() { constexpr size_t kMaxNumberOfSlots = std::numeric_limits::max(); // If there is a free slot, use it. if (!mFreeSlots.empty()) { const int32_t slot = mFreeSlots.top(); mFreeSlots.pop(); return slot; } // Use a slot for the first time. CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; const int32_t slot = static_cast(mMemoryCache.size()); mMemoryCache.emplace_back(); mCacheCleaner.emplace_back(); return slot; } // ExecutionBurstCallback methods Burst::ExecutionBurstCallback::ExecutionBurstCallback( const std::shared_ptr& memoryCache) : kMemoryCache(memoryCache) { CHECK(memoryCache != nullptr); } Return Burst::ExecutionBurstCallback::getMemories(const hidl_vec& slots, getMemories_cb cb) { const auto memoryCache = kMemoryCache.lock(); if (memoryCache == nullptr) { LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories called after the MemoryCache has " "been freed"; cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}); return Void(); } const auto maybeMemories = getMemoriesHelper(slots, memoryCache); if (!maybeMemories.has_value()) { const auto& [message, code] = maybeMemories.error(); LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories failed with " << code << ": " << message; cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); return Void(); } cb(V1_0::ErrorStatus::NONE, maybeMemories.value()); return Void(); } // Burst methods nn::GeneralResult> Burst::create( nn::SharedPreparedModel preparedModel, const sp& hidlPreparedModel, std::chrono::microseconds pollingTimeWindow) { // check inputs if (preparedModel == nullptr || hidlPreparedModel == nullptr) { return NN_ERROR() << "Burst::create passed a nullptr"; } // create FMQ objects auto [requestChannelSender, requestChannelDescriptor] = NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength)); auto [resultChannelReceiver, resultChannelDescriptor] = NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow)); // check FMQ objects CHECK(requestChannelSender != nullptr); CHECK(requestChannelDescriptor != nullptr); CHECK(resultChannelReceiver != nullptr); CHECK(resultChannelDescriptor != nullptr); // create memory cache auto memoryCache = std::make_shared(); // create callback object auto burstCallback = sp::make(memoryCache); auto cb = hal::utils::CallbackValue(executionBurstResultCallback); // configure burst const Return ret = hidlPreparedModel->configureExecutionBurst( burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb); HANDLE_TRANSPORT_FAILURE(ret); auto burstContext = NN_TRY(cb.take()); memoryCache->setBurstContext(burstContext); // create death handler object auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext)); deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get()); deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get()); // make and return controller return std::make_shared( PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender), std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext), std::move(memoryCache), std::move(deathHandler)); } Burst::Burst(PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel, std::unique_ptr requestChannelSender, std::unique_ptr resultChannelReceiver, sp callback, sp burstContext, std::shared_ptr memoryCache, neuralnetworks::utils::DeathHandler deathHandler) : kPreparedModel(std::move(preparedModel)), mRequestChannelSender(std::move(requestChannelSender)), mResultChannelReceiver(std::move(resultChannelReceiver)), mBurstCallback(std::move(callback)), mBurstContext(std::move(burstContext)), mMemoryCache(std::move(memoryCache)), kDeathHandler(std::move(deathHandler)) {} Burst::OptionalCacheHold Burst::cacheMemory(const nn::SharedMemory& memory) const { auto [slot, hold] = mMemoryCache->cacheMemory(memory); return hold; } nn::ExecutionResult, nn::Timing>> Burst::execute( const nn::Request& request, nn::MeasureTiming measure, const nn::OptionalTimePoint& deadline, const nn::OptionalDuration& loopTimeoutDuration, const std::vector& /*hints*/, const std::vector& /*extensionNameToPrefix*/) const { // This is the first point when we know an execution is occurring, so begin to collect // systraces. Note that the first point we can begin collecting systraces in // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so // ExecutionBurstServer collects systraces at different points in the code. NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::execute"); // if the request is valid but of a higher version than what's supported in burst execution, // fall back to another execution path if (!compliantVersion(request).ok()) { // fallback to another execution path if the packet could not be sent return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {}); } // ensure that request is ready for IPC std::optional maybeRequestInShared; hal::utils::RequestRelocation relocation; const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared( &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding, &maybeRequestInShared, &relocation)); // clear pools field of request, as they will be provided via slots const auto requestWithoutPools = nn::Request{ .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}}; auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools)); const auto hidlMeasure = NN_TRY(convert(measure)); std::vector slots; std::vector holds; slots.reserve(requestInShared.pools.size()); holds.reserve(requestInShared.pools.size()); for (const auto& memoryPool : requestInShared.pools) { auto [slot, hold] = mMemoryCache->cacheMemory(std::get(memoryPool)); slots.push_back(slot); holds.push_back(std::move(hold)); } // send request packet const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots); const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] { return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {}); }; return executeInternal(requestPacket, relocation, fallback); } // See IBurst::createReusableExecution for information on this method. nn::GeneralResult Burst::createReusableExecution( const nn::Request& request, nn::MeasureTiming measure, const nn::OptionalDuration& loopTimeoutDuration, const std::vector& /*hints*/, const std::vector& /*extensionNameToPrefix*/) const { NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::createReusableExecution"); // if the request is valid but of a higher version than what's supported in burst execution, // fall back to another execution path if (!compliantVersion(request).ok()) { // fallback to another execution path if the packet could not be sent return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration, {}, {}); } // ensure that request is ready for IPC std::optional maybeRequestInShared; hal::utils::RequestRelocation relocation; const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared( &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding, &maybeRequestInShared, &relocation)); // clear pools field of request, as they will be provided via slots const auto requestWithoutPools = nn::Request{ .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}}; auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools)); const auto hidlMeasure = NN_TRY(convert(measure)); std::vector slots; std::vector holds; slots.reserve(requestInShared.pools.size()); holds.reserve(requestInShared.pools.size()); for (const auto& memoryPool : requestInShared.pools) { auto [slot, hold] = mMemoryCache->cacheMemory(std::get(memoryPool)); slots.push_back(slot); holds.push_back(std::move(hold)); } const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots); return BurstExecution::create(shared_from_this(), std::move(requestPacket), std::move(relocation), std::move(holds)); } nn::ExecutionResult, nn::Timing>> Burst::executeInternal( const std::vector& requestPacket, const hal::utils::RequestRelocation& relocation, FallbackFunction fallback) const { NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "Burst::executeInternal"); // Ensure that at most one execution is in flight at any given time. const bool alreadyInFlight = mExecutionInFlight.test_and_set(); if (alreadyInFlight) { return NN_ERROR() << "IBurst already has an execution in flight"; } const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); }); if (relocation.input) { relocation.input->flush(); } // send request packet const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket); if (!sendStatus.ok()) { // fallback to another execution path if the packet could not be sent if (fallback) { return fallback(); } return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error(); } // get result packet const auto [status, outputShapes, timing] = NN_TRY(mResultChannelReceiver->getBlocking()); if (relocation.output) { relocation.output->flush(); } return executionCallback(status, outputShapes, timing); } nn::GeneralResult> BurstExecution::create( std::shared_ptr controller, std::vector request, hal::utils::RequestRelocation relocation, std::vector cacheHolds) { if (controller == nullptr) { return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller"; } return std::make_shared(PrivateConstructorTag{}, std::move(controller), std::move(request), std::move(relocation), std::move(cacheHolds)); } BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/, std::shared_ptr controller, std::vector request, hal::utils::RequestRelocation relocation, std::vector cacheHolds) : kController(std::move(controller)), kRequest(std::move(request)), kRelocation(std::move(relocation)), kCacheHolds(std::move(cacheHolds)) {} nn::ExecutionResult, nn::Timing>> BurstExecution::compute( const nn::OptionalTimePoint& /*deadline*/) const { return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr); } nn::GeneralResult> BurstExecution::computeFenced(const std::vector& /*waitFor*/, const nn::OptionalTimePoint& /*deadline*/, const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const { return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) << "IExecution::computeFenced is not supported on burst object"; } } // namespace android::hardware::neuralnetworks::V1_2::utils