1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Burst.h"
18 #include "BurstUtils.h"
19 
20 #include <android-base/logging.h>
21 #include <android-base/thread_annotations.h>
22 #include <nnapi/IBurst.h>
23 #include <nnapi/IPreparedModel.h>
24 #include <nnapi/Result.h>
25 #include <nnapi/TypeUtils.h>
26 #include <nnapi/Types.h>
27 #include <nnapi/Validation.h>
28 #include <nnapi/hal/1.0/Conversions.h>
29 #include <nnapi/hal/1.0/HandleError.h>
30 #include <nnapi/hal/1.0/ProtectCallback.h>
31 #include <nnapi/hal/CommonUtils.h>
32 #include <nnapi/hal/TransferValue.h>
33 
34 #include <algorithm>
35 #include <cstring>
36 #include <limits>
37 #include <memory>
38 #include <string>
39 #include <thread>
40 #include <tuple>
41 #include <utility>
42 #include <vector>
43 
44 #include "Callbacks.h"
45 #include "Conversions.h"
46 #include "Tracing.h"
47 #include "Utils.h"
48 
49 namespace android::hardware::neuralnetworks::V1_2::utils {
50 namespace {
51 
52 class BurstExecution final : public nn::IExecution,
53                              public std::enable_shared_from_this<BurstExecution> {
54     struct PrivateConstructorTag {};
55 
56   public:
57     static nn::GeneralResult<std::shared_ptr<const BurstExecution>> create(
58             std::shared_ptr<const Burst> controller, std::vector<FmqRequestDatum> request,
59             hal::utils::RequestRelocation relocation,
60             std::vector<Burst::OptionalCacheHold> cacheHolds);
61 
62     BurstExecution(PrivateConstructorTag tag, std::shared_ptr<const Burst> controller,
63                    std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
64                    std::vector<Burst::OptionalCacheHold> cacheHolds);
65 
66     nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> compute(
67             const nn::OptionalTimePoint& deadline) const override;
68 
69     nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> computeFenced(
70             const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline,
71             const nn::OptionalDuration& timeoutDurationAfterFence) const override;
72 
73   private:
74     const std::shared_ptr<const Burst> kController;
75     const std::vector<FmqRequestDatum> kRequest;
76     const hal::utils::RequestRelocation kRelocation;
77     const std::vector<Burst::OptionalCacheHold> kCacheHolds;
78 };
79 
executionBurstResultCallback(V1_0::ErrorStatus status,const sp<IBurstContext> & burstContext)80 nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback(
81         V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) {
82     HANDLE_STATUS_HIDL(status) << "IPreparedModel::configureExecutionBurst failed with status "
83                                << toString(status);
84     if (burstContext == nullptr) {
85         return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
86                << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
87     }
88     return burstContext;
89 }
90 
getMemoriesHelper(const hidl_vec<int32_t> & slots,const std::shared_ptr<Burst::MemoryCache> & memoryCache)91 nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper(
92         const hidl_vec<int32_t>& slots, const std::shared_ptr<Burst::MemoryCache>& memoryCache) {
93     hidl_vec<hidl_memory> memories(slots.size());
94     for (size_t i = 0; i < slots.size(); ++i) {
95         const int32_t slot = slots[i];
96         const auto memory = NN_TRY(memoryCache->getMemory(slot));
97         memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory));
98         if (!memories[i].valid()) {
99             return NN_ERROR() << "memory at slot " << slot << " is invalid";
100         }
101     }
102     return memories;
103 }
104 
105 }  // namespace
106 
107 // MemoryCache methods
108 
MemoryCache()109 Burst::MemoryCache::MemoryCache() {
110     constexpr size_t kPreallocatedCount = 1024;
111     std::vector<int32_t> freeSlotsSpace;
112     freeSlotsSpace.reserve(kPreallocatedCount);
113     mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace));
114     mMemoryCache.reserve(kPreallocatedCount);
115     mCacheCleaner.reserve(kPreallocatedCount);
116 }
117 
setBurstContext(sp<IBurstContext> burstContext)118 void Burst::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) {
119     std::lock_guard guard(mMutex);
120     mBurstContext = std::move(burstContext);
121 }
122 
cacheMemory(const nn::SharedMemory & memory)123 std::pair<int32_t, Burst::MemoryCache::SharedCleanup> Burst::MemoryCache::cacheMemory(
124         const nn::SharedMemory& memory) {
125     std::unique_lock lock(mMutex);
126     base::ScopedLockAssertion lockAssert(mMutex);
127 
128     // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is
129     // not currently being freed.
130     auto iter = mMemoryIdToSlot.find(memory);
131     while (iter != mMemoryIdToSlot.end()) {
132         const int32_t slot = iter->second;
133         if (auto cleaner = mCacheCleaner.at(slot).lock()) {
134             return std::make_pair(slot, std::move(cleaner));
135         }
136 
137         // If the code reaches this point, the Memory object was in the cache, but is currently
138         // being destroyed. This code waits until the cache entry has been freed, then loops to
139         // ensure the cache entry has been freed or has been made present by another thread.
140         mCond.wait(lock);
141         iter = mMemoryIdToSlot.find(memory);
142     }
143 
144     // Allocate a new cache entry.
145     const int32_t slot = allocateSlotLocked();
146     mMemoryIdToSlot[memory] = slot;
147     mMemoryCache[slot] = memory;
148 
149     // Create reference-counted self-cleaning cache object.
150     auto self = weak_from_this();
151     Task cleanup = [memory, memoryCache = std::move(self)] {
152         if (const auto lock = memoryCache.lock()) {
153             lock->freeMemory(memory);
154         }
155     };
156     auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup));
157     mCacheCleaner[slot] = cleaner;
158 
159     return std::make_pair(slot, std::move(cleaner));
160 }
161 
getMemory(int32_t slot)162 nn::GeneralResult<nn::SharedMemory> Burst::MemoryCache::getMemory(int32_t slot) {
163     std::lock_guard guard(mMutex);
164     if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) {
165         return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size();
166     }
167     return mMemoryCache[slot];
168 }
169 
freeMemory(const nn::SharedMemory & memory)170 void Burst::MemoryCache::freeMemory(const nn::SharedMemory& memory) {
171     {
172         std::lock_guard guard(mMutex);
173         const int32_t slot = mMemoryIdToSlot.at(memory);
174         if (mBurstContext) {
175             const auto ret = mBurstContext->freeMemory(slot);
176             if (!ret.isOk()) {
177                 LOG(ERROR) << "IBustContext::freeMemory failed: " << ret.description();
178             }
179         }
180         mMemoryIdToSlot.erase(memory);
181         mMemoryCache[slot] = {};
182         mCacheCleaner[slot].reset();
183         mFreeSlots.push(slot);
184     }
185     mCond.notify_all();
186 }
187 
allocateSlotLocked()188 int32_t Burst::MemoryCache::allocateSlotLocked() {
189     constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max();
190 
191     // If there is a free slot, use it.
192     if (!mFreeSlots.empty()) {
193         const int32_t slot = mFreeSlots.top();
194         mFreeSlots.pop();
195         return slot;
196     }
197 
198     // Use a slot for the first time.
199     CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!";
200     const int32_t slot = static_cast<int32_t>(mMemoryCache.size());
201     mMemoryCache.emplace_back();
202     mCacheCleaner.emplace_back();
203 
204     return slot;
205 }
206 
207 // ExecutionBurstCallback methods
208 
ExecutionBurstCallback(const std::shared_ptr<MemoryCache> & memoryCache)209 Burst::ExecutionBurstCallback::ExecutionBurstCallback(
210         const std::shared_ptr<MemoryCache>& memoryCache)
211     : kMemoryCache(memoryCache) {
212     CHECK(memoryCache != nullptr);
213 }
214 
getMemories(const hidl_vec<int32_t> & slots,getMemories_cb cb)215 Return<void> Burst::ExecutionBurstCallback::getMemories(const hidl_vec<int32_t>& slots,
216                                                         getMemories_cb cb) {
217     const auto memoryCache = kMemoryCache.lock();
218     if (memoryCache == nullptr) {
219         LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories called after the MemoryCache has "
220                       "been freed";
221         cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
222         return Void();
223     }
224 
225     const auto maybeMemories = getMemoriesHelper(slots, memoryCache);
226     if (!maybeMemories.has_value()) {
227         const auto& [message, code] = maybeMemories.error();
228         LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories failed with " << code << ": "
229                    << message;
230         cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {});
231         return Void();
232     }
233 
234     cb(V1_0::ErrorStatus::NONE, maybeMemories.value());
235     return Void();
236 }
237 
238 // Burst methods
239 
create(nn::SharedPreparedModel preparedModel,const sp<V1_2::IPreparedModel> & hidlPreparedModel,std::chrono::microseconds pollingTimeWindow)240 nn::GeneralResult<std::shared_ptr<const Burst>> Burst::create(
241         nn::SharedPreparedModel preparedModel, const sp<V1_2::IPreparedModel>& hidlPreparedModel,
242         std::chrono::microseconds pollingTimeWindow) {
243     // check inputs
244     if (preparedModel == nullptr || hidlPreparedModel == nullptr) {
245         return NN_ERROR() << "Burst::create passed a nullptr";
246     }
247 
248     // create FMQ objects
249     auto [requestChannelSender, requestChannelDescriptor] =
250             NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength));
251     auto [resultChannelReceiver, resultChannelDescriptor] =
252             NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow));
253 
254     // check FMQ objects
255     CHECK(requestChannelSender != nullptr);
256     CHECK(requestChannelDescriptor != nullptr);
257     CHECK(resultChannelReceiver != nullptr);
258     CHECK(resultChannelDescriptor != nullptr);
259 
260     // create memory cache
261     auto memoryCache = std::make_shared<MemoryCache>();
262 
263     // create callback object
264     auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache);
265     auto cb = hal::utils::CallbackValue(executionBurstResultCallback);
266 
267     // configure burst
268     const Return<void> ret = hidlPreparedModel->configureExecutionBurst(
269             burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb);
270     HANDLE_TRANSPORT_FAILURE(ret);
271 
272     auto burstContext = NN_TRY(cb.take());
273     memoryCache->setBurstContext(burstContext);
274 
275     // create death handler object
276     auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext));
277     deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get());
278     deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get());
279 
280     // make and return controller
281     return std::make_shared<const Burst>(
282             PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender),
283             std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext),
284             std::move(memoryCache), std::move(deathHandler));
285 }
286 
Burst(PrivateConstructorTag,nn::SharedPreparedModel preparedModel,std::unique_ptr<RequestChannelSender> requestChannelSender,std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,sp<ExecutionBurstCallback> callback,sp<IBurstContext> burstContext,std::shared_ptr<MemoryCache> memoryCache,neuralnetworks::utils::DeathHandler deathHandler)287 Burst::Burst(PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel,
288              std::unique_ptr<RequestChannelSender> requestChannelSender,
289              std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
290              sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
291              std::shared_ptr<MemoryCache> memoryCache,
292              neuralnetworks::utils::DeathHandler deathHandler)
293     : kPreparedModel(std::move(preparedModel)),
294       mRequestChannelSender(std::move(requestChannelSender)),
295       mResultChannelReceiver(std::move(resultChannelReceiver)),
296       mBurstCallback(std::move(callback)),
297       mBurstContext(std::move(burstContext)),
298       mMemoryCache(std::move(memoryCache)),
299       kDeathHandler(std::move(deathHandler)) {}
300 
cacheMemory(const nn::SharedMemory & memory) const301 Burst::OptionalCacheHold Burst::cacheMemory(const nn::SharedMemory& memory) const {
302     auto [slot, hold] = mMemoryCache->cacheMemory(memory);
303     return hold;
304 }
305 
execute(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> &,const std::vector<nn::ExtensionNameAndPrefix> &) const306 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Burst::execute(
307         const nn::Request& request, nn::MeasureTiming measure,
308         const nn::OptionalTimePoint& deadline, const nn::OptionalDuration& loopTimeoutDuration,
309         const std::vector<nn::TokenValuePair>& /*hints*/,
310         const std::vector<nn::ExtensionNameAndPrefix>& /*extensionNameToPrefix*/) const {
311     // This is the first point when we know an execution is occurring, so begin to collect
312     // systraces. Note that the first point we can begin collecting systraces in
313     // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so
314     // ExecutionBurstServer collects systraces at different points in the code.
315     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::execute");
316 
317     // if the request is valid but of a higher version than what's supported in burst execution,
318     // fall back to another execution path
319     if (!compliantVersion(request).ok()) {
320         // fallback to another execution path if the packet could not be sent
321         return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {});
322     }
323 
324     // ensure that request is ready for IPC
325     std::optional<nn::Request> maybeRequestInShared;
326     hal::utils::RequestRelocation relocation;
327     const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
328             &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
329             &maybeRequestInShared, &relocation));
330 
331     // clear pools field of request, as they will be provided via slots
332     const auto requestWithoutPools = nn::Request{
333             .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
334     auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
335     const auto hidlMeasure = NN_TRY(convert(measure));
336 
337     std::vector<int32_t> slots;
338     std::vector<OptionalCacheHold> holds;
339     slots.reserve(requestInShared.pools.size());
340     holds.reserve(requestInShared.pools.size());
341     for (const auto& memoryPool : requestInShared.pools) {
342         auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
343         slots.push_back(slot);
344         holds.push_back(std::move(hold));
345     }
346 
347     // send request packet
348     const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
349     const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] {
350         return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {});
351     };
352     return executeInternal(requestPacket, relocation, fallback);
353 }
354 
355 // See IBurst::createReusableExecution for information on this method.
createReusableExecution(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> &,const std::vector<nn::ExtensionNameAndPrefix> &) const356 nn::GeneralResult<nn::SharedExecution> Burst::createReusableExecution(
357         const nn::Request& request, nn::MeasureTiming measure,
358         const nn::OptionalDuration& loopTimeoutDuration,
359         const std::vector<nn::TokenValuePair>& /*hints*/,
360         const std::vector<nn::ExtensionNameAndPrefix>& /*extensionNameToPrefix*/) const {
361     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::createReusableExecution");
362 
363     // if the request is valid but of a higher version than what's supported in burst execution,
364     // fall back to another execution path
365     if (!compliantVersion(request).ok()) {
366         // fallback to another execution path if the packet could not be sent
367         return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration, {},
368                                                        {});
369     }
370 
371     // ensure that request is ready for IPC
372     std::optional<nn::Request> maybeRequestInShared;
373     hal::utils::RequestRelocation relocation;
374     const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
375             &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
376             &maybeRequestInShared, &relocation));
377 
378     // clear pools field of request, as they will be provided via slots
379     const auto requestWithoutPools = nn::Request{
380             .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
381     auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
382     const auto hidlMeasure = NN_TRY(convert(measure));
383 
384     std::vector<int32_t> slots;
385     std::vector<OptionalCacheHold> holds;
386     slots.reserve(requestInShared.pools.size());
387     holds.reserve(requestInShared.pools.size());
388     for (const auto& memoryPool : requestInShared.pools) {
389         auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
390         slots.push_back(slot);
391         holds.push_back(std::move(hold));
392     }
393 
394     const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
395     return BurstExecution::create(shared_from_this(), std::move(requestPacket),
396                                   std::move(relocation), std::move(holds));
397 }
398 
executeInternal(const std::vector<FmqRequestDatum> & requestPacket,const hal::utils::RequestRelocation & relocation,FallbackFunction fallback) const399 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Burst::executeInternal(
400         const std::vector<FmqRequestDatum>& requestPacket,
401         const hal::utils::RequestRelocation& relocation, FallbackFunction fallback) const {
402     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "Burst::executeInternal");
403 
404     // Ensure that at most one execution is in flight at any given time.
405     const bool alreadyInFlight = mExecutionInFlight.test_and_set();
406     if (alreadyInFlight) {
407         return NN_ERROR() << "IBurst already has an execution in flight";
408     }
409     const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });
410 
411     if (relocation.input) {
412         relocation.input->flush();
413     }
414 
415     // send request packet
416     const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket);
417     if (!sendStatus.ok()) {
418         // fallback to another execution path if the packet could not be sent
419         if (fallback) {
420             return fallback();
421         }
422         return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error();
423     }
424 
425     // get result packet
426     const auto [status, outputShapes, timing] = NN_TRY(mResultChannelReceiver->getBlocking());
427 
428     if (relocation.output) {
429         relocation.output->flush();
430     }
431     return executionCallback(status, outputShapes, timing);
432 }
433 
create(std::shared_ptr<const Burst> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<Burst::OptionalCacheHold> cacheHolds)434 nn::GeneralResult<std::shared_ptr<const BurstExecution>> BurstExecution::create(
435         std::shared_ptr<const Burst> controller, std::vector<FmqRequestDatum> request,
436         hal::utils::RequestRelocation relocation,
437         std::vector<Burst::OptionalCacheHold> cacheHolds) {
438     if (controller == nullptr) {
439         return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller";
440     }
441 
442     return std::make_shared<const BurstExecution>(PrivateConstructorTag{}, std::move(controller),
443                                                   std::move(request), std::move(relocation),
444                                                   std::move(cacheHolds));
445 }
446 
BurstExecution(PrivateConstructorTag,std::shared_ptr<const Burst> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<Burst::OptionalCacheHold> cacheHolds)447 BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/,
448                                std::shared_ptr<const Burst> controller,
449                                std::vector<FmqRequestDatum> request,
450                                hal::utils::RequestRelocation relocation,
451                                std::vector<Burst::OptionalCacheHold> cacheHolds)
452     : kController(std::move(controller)),
453       kRequest(std::move(request)),
454       kRelocation(std::move(relocation)),
455       kCacheHolds(std::move(cacheHolds)) {}
456 
compute(const nn::OptionalTimePoint &) const457 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> BurstExecution::compute(
458         const nn::OptionalTimePoint& /*deadline*/) const {
459     return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr);
460 }
461 
462 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
computeFenced(const std::vector<nn::SyncFence> &,const nn::OptionalTimePoint &,const nn::OptionalDuration &) const463 BurstExecution::computeFenced(const std::vector<nn::SyncFence>& /*waitFor*/,
464                               const nn::OptionalTimePoint& /*deadline*/,
465                               const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const {
466     return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
467            << "IExecution::computeFenced is not supported on burst object";
468 }
469 
470 }  // namespace android::hardware::neuralnetworks::V1_2::utils
471