1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_EXECUTION_BURST_SERVER_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_EXECUTION_BURST_SERVER_H 19 20 #include <android-base/macros.h> 21 #include <android/hardware/neuralnetworks/1.0/types.h> 22 #include <android/hardware/neuralnetworks/1.1/types.h> 23 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h> 24 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h> 25 #include <android/hardware/neuralnetworks/1.2/types.h> 26 #include <fmq/MessageQueue.h> 27 #include <hidl/MQDescriptor.h> 28 29 #include <atomic> 30 #include <chrono> 31 #include <memory> 32 #include <optional> 33 #include <thread> 34 #include <tuple> 35 #include <vector> 36 37 namespace android::nn { 38 39 using FmqRequestDescriptor = 40 hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>; 41 using FmqResultDescriptor = 42 hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>; 43 44 /** 45 * Function to serialize results. 46 * 47 * Prefer calling ResultChannelSender::send. 48 * 49 * @param errorStatus Status of the execution. 50 * @param outputShapes Dynamic shapes of the output tensors. 51 * @param timing Timing information of the execution. 52 * @return Serialized FMQ result data. 53 */ 54 std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum> serialize( 55 hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, 56 const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes, 57 hardware::neuralnetworks::V1_2::Timing timing); 58 59 /** 60 * Deserialize the FMQ request data. 61 * 62 * The three resulting fields are the Request object (where Request::pools is 63 * empty), slot identifiers (which are stand-ins for Request::pools), and 64 * whether timing information must be collected for the run. 65 * 66 * @param data Serialized FMQ request data. 67 * @return Request object if successfully deserialized, std::nullopt otherwise. 68 */ 69 std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>, 70 hardware::neuralnetworks::V1_2::MeasureTiming>> 71 deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& data); 72 73 /** 74 * RequestChannelReceiver is responsible for waiting on the channel until the 75 * packet is available, extracting the packet from the channel, and 76 * deserializing the packet. 77 * 78 * Because the receiver can wait on a packet that may never come (e.g., because 79 * the sending side of the packet has been closed), this object can be 80 * invalidated, unblocking the receiver. 81 */ 82 class RequestChannelReceiver { 83 using FmqRequestChannel = 84 hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum, 85 hardware::kSynchronizedReadWrite>; 86 87 public: 88 /** 89 * Create the receiving end of a request channel. 90 * 91 * Prefer this call over the constructor. 92 * 93 * @param requestChannel Descriptor for the request channel. 94 * @param pollingTimeWindow How much time (in microseconds) the 95 * RequestChannelReceiver is allowed to poll the FMQ before waiting on 96 * the blocking futex. Polling may result in lower latencies at the 97 * potential cost of more power usage. 98 * @return RequestChannelReceiver on successful creation, nullptr otherwise. 99 */ 100 static std::unique_ptr<RequestChannelReceiver> create( 101 const FmqRequestDescriptor& requestChannel, 102 std::chrono::microseconds pollingTimeWindow); 103 104 /** 105 * Get the request from the channel. 106 * 107 * This method will block until either: 108 * 1) The packet has been retrieved, or 109 * 2) The receiver has been invalidated 110 * 111 * @return Request object if successfully received, std::nullopt if error or 112 * if the receiver object was invalidated. 113 */ 114 std::optional<std::tuple<hardware::neuralnetworks::V1_0::Request, std::vector<int32_t>, 115 hardware::neuralnetworks::V1_2::MeasureTiming>> 116 getBlocking(); 117 118 /** 119 * Method to mark the channel as invalid, unblocking any current or future 120 * calls to RequestChannelReceiver::getBlocking. 121 */ 122 void invalidate(); 123 124 RequestChannelReceiver(std::unique_ptr<FmqRequestChannel> fmqRequestChannel, 125 std::chrono::microseconds pollingTimeWindow); 126 127 private: 128 std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>> getPacketBlocking(); 129 130 const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel; 131 std::atomic<bool> mTeardown{false}; 132 const std::chrono::microseconds kPollingTimeWindow; 133 }; 134 135 /** 136 * ResultChannelSender is responsible for serializing the result packet of 137 * information, sending it on the result channel, and signaling that the data is 138 * available. 139 */ 140 class ResultChannelSender { 141 using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum, 142 hardware::kSynchronizedReadWrite>; 143 144 public: 145 /** 146 * Create the sending end of a result channel. 147 * 148 * Prefer this call over the constructor. 149 * 150 * @param resultChannel Descriptor for the result channel. 151 * @return ResultChannelSender on successful creation, nullptr otherwise. 152 */ 153 static std::unique_ptr<ResultChannelSender> create(const FmqResultDescriptor& resultChannel); 154 155 /** 156 * Send the result to the channel. 157 * 158 * @param errorStatus Status of the execution. 159 * @param outputShapes Dynamic shapes of the output tensors. 160 * @param timing Timing information of the execution. 161 * @return 'true' on successful send, 'false' otherwise. 162 */ 163 bool send(hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, 164 const std::vector<hardware::neuralnetworks::V1_2::OutputShape>& outputShapes, 165 hardware::neuralnetworks::V1_2::Timing timing); 166 167 // prefer calling ResultChannelSender::send 168 bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& packet); 169 170 ResultChannelSender(std::unique_ptr<FmqResultChannel> fmqResultChannel); 171 172 private: 173 const std::unique_ptr<FmqResultChannel> mFmqResultChannel; 174 }; 175 176 /** 177 * The ExecutionBurstServer class is responsible for waiting for and 178 * deserializing a request object from a FMQ, performing the inference, and 179 * serializing the result back across another FMQ. 180 */ 181 class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContext { 182 DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstServer); 183 184 public: 185 /** 186 * IBurstExecutorWithCache is a callback object passed to 187 * ExecutionBurstServer's factory function that is used to perform an 188 * execution. Because some memory resources are needed across multiple 189 * executions, this object also contains a local cache that can directly be 190 * used in the execution. 191 * 192 * ExecutionBurstServer will never access its IBurstExecutorWithCache object 193 * with concurrent calls. 194 */ 195 class IBurstExecutorWithCache { 196 DISALLOW_COPY_AND_ASSIGN(IBurstExecutorWithCache); 197 198 public: 199 IBurstExecutorWithCache() = default; 200 virtual ~IBurstExecutorWithCache() = default; 201 202 /** 203 * Checks if a cache entry specified by a slot is present in the cache. 204 * 205 * @param slot Identifier of the cache entry. 206 * @return 'true' if the cache entry is present in the cache, 'false' 207 * otherwise. 208 */ 209 virtual bool isCacheEntryPresent(int32_t slot) const = 0; 210 211 /** 212 * Adds an entry specified by a slot to the cache. 213 * 214 * The caller of this function must ensure that the cache entry that is 215 * being added is not already present in the cache. This can be checked 216 * via isCacheEntryPresent. 217 * 218 * @param memory Memory resource to be cached. 219 * @param slot Slot identifier corresponding to the memory resource. 220 */ 221 virtual void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) = 0; 222 223 /** 224 * Removes an entry specified by a slot from the cache. 225 * 226 * If the cache entry corresponding to the slot number does not exist, 227 * the call does nothing. 228 * 229 * @param slot Slot identifier corresponding to the memory resource. 230 */ 231 virtual void removeCacheEntry(int32_t slot) = 0; 232 233 /** 234 * Perform an execution. 235 * 236 * @param request Request object with inputs and outputs specified. 237 * Request::pools is empty, and DataLocation::poolIndex instead 238 * refers to the 'slots' argument as if it were Request::pools. 239 * @param slots Slots corresponding to the cached memory entries to be 240 * used. 241 * @param measure Whether timing information is requested for the 242 * execution. 243 * @return Result of the execution, including the status of the 244 * execution, dynamic output shapes, and any timing information. 245 */ 246 virtual std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus, 247 hardware::hidl_vec<hardware::neuralnetworks::V1_2::OutputShape>, 248 hardware::neuralnetworks::V1_2::Timing> 249 execute(const hardware::neuralnetworks::V1_0::Request& request, 250 const std::vector<int32_t>& slots, 251 hardware::neuralnetworks::V1_2::MeasureTiming measure) = 0; 252 }; 253 254 /** 255 * Create automated context to manage FMQ-based executions. 256 * 257 * This function is intended to be used by a service to automatically: 258 * 1) Receive data from a provided FMQ 259 * 2) Execute a model with the given information 260 * 3) Send the result to the created FMQ 261 * 262 * @param callback Callback used to retrieve memories corresponding to 263 * unrecognized slots. 264 * @param requestChannel Input FMQ channel through which the client passes the 265 * request to the service. 266 * @param resultChannel Output FMQ channel from which the client can retrieve 267 * the result of the execution. 268 * @param executorWithCache Object which maintains a local cache of the 269 * memory pools and executes using the cached memory pools. 270 * @param pollingTimeWindow How much time (in microseconds) the 271 * ExecutionBurstServer is allowed to poll the FMQ before waiting on 272 * the blocking futex. Polling may result in lower latencies at the 273 * potential cost of more power usage. 274 * @result IBurstContext Handle to the burst context. 275 */ 276 static sp<ExecutionBurstServer> create( 277 const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, 278 const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, 279 std::shared_ptr<IBurstExecutorWithCache> executorWithCache, 280 std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); 281 282 /** 283 * Create automated context to manage FMQ-based executions. 284 * 285 * This function is intended to be used by a service to automatically: 286 * 1) Receive data from a provided FMQ 287 * 2) Execute a model with the given information 288 * 3) Send the result to the created FMQ 289 * 290 * @param callback Callback used to retrieve memories corresponding to 291 * unrecognized slots. 292 * @param requestChannel Input FMQ channel through which the client passes the 293 * request to the service. 294 * @param resultChannel Output FMQ channel from which the client can retrieve 295 * the result of the execution. 296 * @param preparedModel PreparedModel that the burst object was created from. 297 * IPreparedModel::executeSynchronously will be used to perform the 298 * execution. 299 * @param pollingTimeWindow How much time (in microseconds) the 300 * ExecutionBurstServer is allowed to poll the FMQ before waiting on 301 * the blocking futex. Polling may result in lower latencies at the 302 * potential cost of more power usage. 303 * @result IBurstContext Handle to the burst context. 304 */ 305 static sp<ExecutionBurstServer> create( 306 const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, 307 const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, 308 hardware::neuralnetworks::V1_2::IPreparedModel* preparedModel, 309 std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); 310 311 ExecutionBurstServer(const sp<hardware::neuralnetworks::V1_2::IBurstCallback>& callback, 312 std::unique_ptr<RequestChannelReceiver> requestChannel, 313 std::unique_ptr<ResultChannelSender> resultChannel, 314 std::shared_ptr<IBurstExecutorWithCache> cachedExecutor); 315 ~ExecutionBurstServer(); 316 317 // Used by the NN runtime to preemptively remove any stored memory. 318 hardware::Return<void> freeMemory(int32_t slot) override; 319 320 private: 321 // Ensures all cache entries contained in mExecutorWithCache are present in 322 // the cache. If they are not present, they are retrieved (via 323 // IBurstCallback::getMemories) and added to mExecutorWithCache. 324 // 325 // This method is locked via mMutex when it is called. 326 void ensureCacheEntriesArePresentLocked(const std::vector<int32_t>& slots); 327 328 // Work loop that will continue processing execution requests until the 329 // ExecutionBurstServer object is freed. 330 void task(); 331 332 std::thread mWorker; 333 std::mutex mMutex; 334 std::atomic<bool> mTeardown{false}; 335 const sp<hardware::neuralnetworks::V1_2::IBurstCallback> mCallback; 336 const std::unique_ptr<RequestChannelReceiver> mRequestChannelReceiver; 337 const std::unique_ptr<ResultChannelSender> mResultChannelSender; 338 const std::shared_ptr<IBurstExecutorWithCache> mExecutorWithCache; 339 }; 340 341 } // namespace android::nn 342 343 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_EXECUTION_BURST_SERVER_H 344