1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Burst.h"
18 #include "BurstUtils.h"
19
20 #include <android-base/logging.h>
21 #include <android-base/thread_annotations.h>
22 #include <nnapi/IBurst.h>
23 #include <nnapi/IPreparedModel.h>
24 #include <nnapi/Result.h>
25 #include <nnapi/TypeUtils.h>
26 #include <nnapi/Types.h>
27 #include <nnapi/Validation.h>
28 #include <nnapi/hal/1.0/Conversions.h>
29 #include <nnapi/hal/1.0/HandleError.h>
30 #include <nnapi/hal/1.0/ProtectCallback.h>
31 #include <nnapi/hal/CommonUtils.h>
32 #include <nnapi/hal/TransferValue.h>
33
34 #include <algorithm>
35 #include <cstring>
36 #include <limits>
37 #include <memory>
38 #include <string>
39 #include <thread>
40 #include <tuple>
41 #include <utility>
42 #include <vector>
43
44 #include "Callbacks.h"
45 #include "Conversions.h"
46 #include "Tracing.h"
47 #include "Utils.h"
48
49 namespace android::hardware::neuralnetworks::V1_2::utils {
50 namespace {
51
52 class BurstExecution final : public nn::IExecution,
53 public std::enable_shared_from_this<BurstExecution> {
54 struct PrivateConstructorTag {};
55
56 public:
57 static nn::GeneralResult<std::shared_ptr<const BurstExecution>> create(
58 std::shared_ptr<const Burst> controller, std::vector<FmqRequestDatum> request,
59 hal::utils::RequestRelocation relocation,
60 std::vector<Burst::OptionalCacheHold> cacheHolds);
61
62 BurstExecution(PrivateConstructorTag tag, std::shared_ptr<const Burst> controller,
63 std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation,
64 std::vector<Burst::OptionalCacheHold> cacheHolds);
65
66 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> compute(
67 const nn::OptionalTimePoint& deadline) const override;
68
69 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> computeFenced(
70 const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline,
71 const nn::OptionalDuration& timeoutDurationAfterFence) const override;
72
73 private:
74 const std::shared_ptr<const Burst> kController;
75 const std::vector<FmqRequestDatum> kRequest;
76 const hal::utils::RequestRelocation kRelocation;
77 const std::vector<Burst::OptionalCacheHold> kCacheHolds;
78 };
79
executionBurstResultCallback(V1_0::ErrorStatus status,const sp<IBurstContext> & burstContext)80 nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback(
81 V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) {
82 HANDLE_STATUS_HIDL(status) << "IPreparedModel::configureExecutionBurst failed with status "
83 << toString(status);
84 if (burstContext == nullptr) {
85 return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
86 << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
87 }
88 return burstContext;
89 }
90
getMemoriesHelper(const hidl_vec<int32_t> & slots,const std::shared_ptr<Burst::MemoryCache> & memoryCache)91 nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper(
92 const hidl_vec<int32_t>& slots, const std::shared_ptr<Burst::MemoryCache>& memoryCache) {
93 hidl_vec<hidl_memory> memories(slots.size());
94 for (size_t i = 0; i < slots.size(); ++i) {
95 const int32_t slot = slots[i];
96 const auto memory = NN_TRY(memoryCache->getMemory(slot));
97 memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory));
98 if (!memories[i].valid()) {
99 return NN_ERROR() << "memory at slot " << slot << " is invalid";
100 }
101 }
102 return memories;
103 }
104
105 } // namespace
106
107 // MemoryCache methods
108
MemoryCache()109 Burst::MemoryCache::MemoryCache() {
110 constexpr size_t kPreallocatedCount = 1024;
111 std::vector<int32_t> freeSlotsSpace;
112 freeSlotsSpace.reserve(kPreallocatedCount);
113 mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace));
114 mMemoryCache.reserve(kPreallocatedCount);
115 mCacheCleaner.reserve(kPreallocatedCount);
116 }
117
setBurstContext(sp<IBurstContext> burstContext)118 void Burst::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) {
119 std::lock_guard guard(mMutex);
120 mBurstContext = std::move(burstContext);
121 }
122
cacheMemory(const nn::SharedMemory & memory)123 std::pair<int32_t, Burst::MemoryCache::SharedCleanup> Burst::MemoryCache::cacheMemory(
124 const nn::SharedMemory& memory) {
125 std::unique_lock lock(mMutex);
126 base::ScopedLockAssertion lockAssert(mMutex);
127
128 // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is
129 // not currently being freed.
130 auto iter = mMemoryIdToSlot.find(memory);
131 while (iter != mMemoryIdToSlot.end()) {
132 const int32_t slot = iter->second;
133 if (auto cleaner = mCacheCleaner.at(slot).lock()) {
134 return std::make_pair(slot, std::move(cleaner));
135 }
136
137 // If the code reaches this point, the Memory object was in the cache, but is currently
138 // being destroyed. This code waits until the cache entry has been freed, then loops to
139 // ensure the cache entry has been freed or has been made present by another thread.
140 mCond.wait(lock);
141 iter = mMemoryIdToSlot.find(memory);
142 }
143
144 // Allocate a new cache entry.
145 const int32_t slot = allocateSlotLocked();
146 mMemoryIdToSlot[memory] = slot;
147 mMemoryCache[slot] = memory;
148
149 // Create reference-counted self-cleaning cache object.
150 auto self = weak_from_this();
151 Task cleanup = [memory, memoryCache = std::move(self)] {
152 if (const auto lock = memoryCache.lock()) {
153 lock->freeMemory(memory);
154 }
155 };
156 auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup));
157 mCacheCleaner[slot] = cleaner;
158
159 return std::make_pair(slot, std::move(cleaner));
160 }
161
getMemory(int32_t slot)162 nn::GeneralResult<nn::SharedMemory> Burst::MemoryCache::getMemory(int32_t slot) {
163 std::lock_guard guard(mMutex);
164 if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) {
165 return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size();
166 }
167 return mMemoryCache[slot];
168 }
169
freeMemory(const nn::SharedMemory & memory)170 void Burst::MemoryCache::freeMemory(const nn::SharedMemory& memory) {
171 {
172 std::lock_guard guard(mMutex);
173 const int32_t slot = mMemoryIdToSlot.at(memory);
174 if (mBurstContext) {
175 const auto ret = mBurstContext->freeMemory(slot);
176 if (!ret.isOk()) {
177 LOG(ERROR) << "IBustContext::freeMemory failed: " << ret.description();
178 }
179 }
180 mMemoryIdToSlot.erase(memory);
181 mMemoryCache[slot] = {};
182 mCacheCleaner[slot].reset();
183 mFreeSlots.push(slot);
184 }
185 mCond.notify_all();
186 }
187
allocateSlotLocked()188 int32_t Burst::MemoryCache::allocateSlotLocked() {
189 constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max();
190
191 // If there is a free slot, use it.
192 if (!mFreeSlots.empty()) {
193 const int32_t slot = mFreeSlots.top();
194 mFreeSlots.pop();
195 return slot;
196 }
197
198 // Use a slot for the first time.
199 CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!";
200 const int32_t slot = static_cast<int32_t>(mMemoryCache.size());
201 mMemoryCache.emplace_back();
202 mCacheCleaner.emplace_back();
203
204 return slot;
205 }
206
207 // ExecutionBurstCallback methods
208
ExecutionBurstCallback(const std::shared_ptr<MemoryCache> & memoryCache)209 Burst::ExecutionBurstCallback::ExecutionBurstCallback(
210 const std::shared_ptr<MemoryCache>& memoryCache)
211 : kMemoryCache(memoryCache) {
212 CHECK(memoryCache != nullptr);
213 }
214
getMemories(const hidl_vec<int32_t> & slots,getMemories_cb cb)215 Return<void> Burst::ExecutionBurstCallback::getMemories(const hidl_vec<int32_t>& slots,
216 getMemories_cb cb) {
217 const auto memoryCache = kMemoryCache.lock();
218 if (memoryCache == nullptr) {
219 LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories called after the MemoryCache has "
220 "been freed";
221 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
222 return Void();
223 }
224
225 const auto maybeMemories = getMemoriesHelper(slots, memoryCache);
226 if (!maybeMemories.has_value()) {
227 const auto& [message, code] = maybeMemories.error();
228 LOG(ERROR) << "Burst::ExecutionBurstCallback::getMemories failed with " << code << ": "
229 << message;
230 cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {});
231 return Void();
232 }
233
234 cb(V1_0::ErrorStatus::NONE, maybeMemories.value());
235 return Void();
236 }
237
238 // Burst methods
239
create(nn::SharedPreparedModel preparedModel,const sp<V1_2::IPreparedModel> & hidlPreparedModel,std::chrono::microseconds pollingTimeWindow)240 nn::GeneralResult<std::shared_ptr<const Burst>> Burst::create(
241 nn::SharedPreparedModel preparedModel, const sp<V1_2::IPreparedModel>& hidlPreparedModel,
242 std::chrono::microseconds pollingTimeWindow) {
243 // check inputs
244 if (preparedModel == nullptr || hidlPreparedModel == nullptr) {
245 return NN_ERROR() << "Burst::create passed a nullptr";
246 }
247
248 // create FMQ objects
249 auto [requestChannelSender, requestChannelDescriptor] =
250 NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength));
251 auto [resultChannelReceiver, resultChannelDescriptor] =
252 NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow));
253
254 // check FMQ objects
255 CHECK(requestChannelSender != nullptr);
256 CHECK(requestChannelDescriptor != nullptr);
257 CHECK(resultChannelReceiver != nullptr);
258 CHECK(resultChannelDescriptor != nullptr);
259
260 // create memory cache
261 auto memoryCache = std::make_shared<MemoryCache>();
262
263 // create callback object
264 auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache);
265 auto cb = hal::utils::CallbackValue(executionBurstResultCallback);
266
267 // configure burst
268 const Return<void> ret = hidlPreparedModel->configureExecutionBurst(
269 burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb);
270 HANDLE_TRANSPORT_FAILURE(ret);
271
272 auto burstContext = NN_TRY(cb.take());
273 memoryCache->setBurstContext(burstContext);
274
275 // create death handler object
276 auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext));
277 deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get());
278 deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get());
279
280 // make and return controller
281 return std::make_shared<const Burst>(
282 PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender),
283 std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext),
284 std::move(memoryCache), std::move(deathHandler));
285 }
286
Burst(PrivateConstructorTag,nn::SharedPreparedModel preparedModel,std::unique_ptr<RequestChannelSender> requestChannelSender,std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,sp<ExecutionBurstCallback> callback,sp<IBurstContext> burstContext,std::shared_ptr<MemoryCache> memoryCache,neuralnetworks::utils::DeathHandler deathHandler)287 Burst::Burst(PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel,
288 std::unique_ptr<RequestChannelSender> requestChannelSender,
289 std::unique_ptr<ResultChannelReceiver> resultChannelReceiver,
290 sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext,
291 std::shared_ptr<MemoryCache> memoryCache,
292 neuralnetworks::utils::DeathHandler deathHandler)
293 : kPreparedModel(std::move(preparedModel)),
294 mRequestChannelSender(std::move(requestChannelSender)),
295 mResultChannelReceiver(std::move(resultChannelReceiver)),
296 mBurstCallback(std::move(callback)),
297 mBurstContext(std::move(burstContext)),
298 mMemoryCache(std::move(memoryCache)),
299 kDeathHandler(std::move(deathHandler)) {}
300
cacheMemory(const nn::SharedMemory & memory) const301 Burst::OptionalCacheHold Burst::cacheMemory(const nn::SharedMemory& memory) const {
302 auto [slot, hold] = mMemoryCache->cacheMemory(memory);
303 return hold;
304 }
305
execute(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalTimePoint & deadline,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> &,const std::vector<nn::ExtensionNameAndPrefix> &) const306 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Burst::execute(
307 const nn::Request& request, nn::MeasureTiming measure,
308 const nn::OptionalTimePoint& deadline, const nn::OptionalDuration& loopTimeoutDuration,
309 const std::vector<nn::TokenValuePair>& /*hints*/,
310 const std::vector<nn::ExtensionNameAndPrefix>& /*extensionNameToPrefix*/) const {
311 // This is the first point when we know an execution is occurring, so begin to collect
312 // systraces. Note that the first point we can begin collecting systraces in
313 // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so
314 // ExecutionBurstServer collects systraces at different points in the code.
315 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::execute");
316
317 // if the request is valid but of a higher version than what's supported in burst execution,
318 // fall back to another execution path
319 if (!compliantVersion(request).ok()) {
320 // fallback to another execution path if the packet could not be sent
321 return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {});
322 }
323
324 // ensure that request is ready for IPC
325 std::optional<nn::Request> maybeRequestInShared;
326 hal::utils::RequestRelocation relocation;
327 const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
328 &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
329 &maybeRequestInShared, &relocation));
330
331 // clear pools field of request, as they will be provided via slots
332 const auto requestWithoutPools = nn::Request{
333 .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
334 auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
335 const auto hidlMeasure = NN_TRY(convert(measure));
336
337 std::vector<int32_t> slots;
338 std::vector<OptionalCacheHold> holds;
339 slots.reserve(requestInShared.pools.size());
340 holds.reserve(requestInShared.pools.size());
341 for (const auto& memoryPool : requestInShared.pools) {
342 auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
343 slots.push_back(slot);
344 holds.push_back(std::move(hold));
345 }
346
347 // send request packet
348 const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
349 const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] {
350 return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, {}, {});
351 };
352 return executeInternal(requestPacket, relocation, fallback);
353 }
354
355 // See IBurst::createReusableExecution for information on this method.
createReusableExecution(const nn::Request & request,nn::MeasureTiming measure,const nn::OptionalDuration & loopTimeoutDuration,const std::vector<nn::TokenValuePair> &,const std::vector<nn::ExtensionNameAndPrefix> &) const356 nn::GeneralResult<nn::SharedExecution> Burst::createReusableExecution(
357 const nn::Request& request, nn::MeasureTiming measure,
358 const nn::OptionalDuration& loopTimeoutDuration,
359 const std::vector<nn::TokenValuePair>& /*hints*/,
360 const std::vector<nn::ExtensionNameAndPrefix>& /*extensionNameToPrefix*/) const {
361 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "Burst::createReusableExecution");
362
363 // if the request is valid but of a higher version than what's supported in burst execution,
364 // fall back to another execution path
365 if (!compliantVersion(request).ok()) {
366 // fallback to another execution path if the packet could not be sent
367 return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration, {},
368 {});
369 }
370
371 // ensure that request is ready for IPC
372 std::optional<nn::Request> maybeRequestInShared;
373 hal::utils::RequestRelocation relocation;
374 const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared(
375 &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding,
376 &maybeRequestInShared, &relocation));
377
378 // clear pools field of request, as they will be provided via slots
379 const auto requestWithoutPools = nn::Request{
380 .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}};
381 auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools));
382 const auto hidlMeasure = NN_TRY(convert(measure));
383
384 std::vector<int32_t> slots;
385 std::vector<OptionalCacheHold> holds;
386 slots.reserve(requestInShared.pools.size());
387 holds.reserve(requestInShared.pools.size());
388 for (const auto& memoryPool : requestInShared.pools) {
389 auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool));
390 slots.push_back(slot);
391 holds.push_back(std::move(hold));
392 }
393
394 const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots);
395 return BurstExecution::create(shared_from_this(), std::move(requestPacket),
396 std::move(relocation), std::move(holds));
397 }
398
executeInternal(const std::vector<FmqRequestDatum> & requestPacket,const hal::utils::RequestRelocation & relocation,FallbackFunction fallback) const399 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> Burst::executeInternal(
400 const std::vector<FmqRequestDatum>& requestPacket,
401 const hal::utils::RequestRelocation& relocation, FallbackFunction fallback) const {
402 NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "Burst::executeInternal");
403
404 // Ensure that at most one execution is in flight at any given time.
405 const bool alreadyInFlight = mExecutionInFlight.test_and_set();
406 if (alreadyInFlight) {
407 return NN_ERROR() << "IBurst already has an execution in flight";
408 }
409 const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); });
410
411 if (relocation.input) {
412 relocation.input->flush();
413 }
414
415 // send request packet
416 const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket);
417 if (!sendStatus.ok()) {
418 // fallback to another execution path if the packet could not be sent
419 if (fallback) {
420 return fallback();
421 }
422 return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error();
423 }
424
425 // get result packet
426 const auto [status, outputShapes, timing] = NN_TRY(mResultChannelReceiver->getBlocking());
427
428 if (relocation.output) {
429 relocation.output->flush();
430 }
431 return executionCallback(status, outputShapes, timing);
432 }
433
create(std::shared_ptr<const Burst> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<Burst::OptionalCacheHold> cacheHolds)434 nn::GeneralResult<std::shared_ptr<const BurstExecution>> BurstExecution::create(
435 std::shared_ptr<const Burst> controller, std::vector<FmqRequestDatum> request,
436 hal::utils::RequestRelocation relocation,
437 std::vector<Burst::OptionalCacheHold> cacheHolds) {
438 if (controller == nullptr) {
439 return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller";
440 }
441
442 return std::make_shared<const BurstExecution>(PrivateConstructorTag{}, std::move(controller),
443 std::move(request), std::move(relocation),
444 std::move(cacheHolds));
445 }
446
BurstExecution(PrivateConstructorTag,std::shared_ptr<const Burst> controller,std::vector<FmqRequestDatum> request,hal::utils::RequestRelocation relocation,std::vector<Burst::OptionalCacheHold> cacheHolds)447 BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/,
448 std::shared_ptr<const Burst> controller,
449 std::vector<FmqRequestDatum> request,
450 hal::utils::RequestRelocation relocation,
451 std::vector<Burst::OptionalCacheHold> cacheHolds)
452 : kController(std::move(controller)),
453 kRequest(std::move(request)),
454 kRelocation(std::move(relocation)),
455 kCacheHolds(std::move(cacheHolds)) {}
456
compute(const nn::OptionalTimePoint &) const457 nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> BurstExecution::compute(
458 const nn::OptionalTimePoint& /*deadline*/) const {
459 return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr);
460 }
461
462 nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>>
computeFenced(const std::vector<nn::SyncFence> &,const nn::OptionalTimePoint &,const nn::OptionalDuration &) const463 BurstExecution::computeFenced(const std::vector<nn::SyncFence>& /*waitFor*/,
464 const nn::OptionalTimePoint& /*deadline*/,
465 const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const {
466 return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE)
467 << "IExecution::computeFenced is not supported on burst object";
468 }
469
470 } // namespace android::hardware::neuralnetworks::V1_2::utils
471