1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "SampleDriverFloatXNNPACK"
18
19 #include <CpuExecutor.h>
20 #include <HalInterfaces.h>
21 #include <Utils.h>
22 #include <ValidateHal.h>
23 #include <android-base/logging.h>
24 #include <hidl/LegacySupport.h>
25 #include <hwbinder/IPCThreadState.h>
26 #include <xnnpack.h>
27
28 #include <algorithm>
29 #include <cstdint>
30 #include <limits>
31 #include <memory>
32 #include <string>
33 #include <thread>
34 #include <tuple>
35 #include <unordered_set>
36 #include <utility>
37 #include <vector>
38
39 #include "SampleDriverPartial.h"
40 #include "SampleDriverUtils.h"
41
42 namespace android {
43 namespace nn {
44 namespace sample_driver {
45
46 namespace {
47
48 #define NN_DRIVER_RETURN_IF_ERROR(expr) \
49 do { \
50 V1_3::ErrorStatus _errorCode = (expr); \
51 if (_errorCode != V1_3::ErrorStatus::NONE) { \
52 return _errorCode; \
53 } \
54 } while (0)
55
56 const size_t kNumOfWorkerThreads = 1;
57 static const V1_2::Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
58
isScalarType(OperandType type)59 bool isScalarType(OperandType type) {
60 switch (type) {
61 case OperandType::FLOAT16:
62 case OperandType::FLOAT32:
63 case OperandType::INT32:
64 case OperandType::UINT32:
65 case OperandType::BOOL:
66 return true;
67 default:
68 return false;
69 }
70 }
71
updateForArguments(const std::vector<uint32_t> & indexes,const hardware::hidl_vec<V1_0::RequestArgument> & arguments,const std::vector<RunTimePoolInfo> & requestPoolInfos,RunTimeOperandInfo * operands)72 void updateForArguments(const std::vector<uint32_t>& indexes,
73 const hardware::hidl_vec<V1_0::RequestArgument>& arguments,
74 const std::vector<RunTimePoolInfo>& requestPoolInfos,
75 RunTimeOperandInfo* operands) {
76 CHECK_EQ(indexes.size(), arguments.size());
77 for (size_t i = 0; i < indexes.size(); i++) {
78 const uint32_t operandIndex = indexes[i];
79 const V1_0::RequestArgument& from = arguments[i];
80 RunTimeOperandInfo& to = operands[operandIndex];
81 if (from.dimensions.size() > 0) {
82 // It's the responsibility of the caller to validate that
83 // from.dimensions only modifies the dimensions that were
84 // unspecified in the model. That's the case in SampleDriver.cpp
85 // with the call to validateRequest().
86 // TODO make sure that's the case for the default CPU path.
87 to.dimensions = from.dimensions;
88 }
89 if (from.hasNoValue) {
90 to.lifetime = Operand::LifeTime::NO_VALUE;
91 CHECK(to.buffer == nullptr);
92 to.length = 0;
93 } else {
94 auto poolIndex = from.location.poolIndex;
95 CHECK_LT(poolIndex, requestPoolInfos.size());
96 auto& r = requestPoolInfos[poolIndex];
97 to.buffer = r.getBuffer() + from.location.offset;
98 if (from.location.offset == 0 && from.location.length == 0) {
99 // Use the entire memory region.
100 to.length = r.getSize();
101 } else {
102 to.length = from.location.length;
103 }
104 }
105 }
106 }
107
initializeRunTimeInfo(const V1_3::Subgraph & subgraph,const std::vector<RunTimePoolInfo> & modelPoolInfos,const hardware::hidl_vec<uint8_t> * mModelOperandValues)108 std::vector<RunTimeOperandInfo> initializeRunTimeInfo(
109 const V1_3::Subgraph& subgraph, const std::vector<RunTimePoolInfo>& modelPoolInfos,
110 const hardware::hidl_vec<uint8_t>* mModelOperandValues) {
111 const size_t count = subgraph.operands.size();
112 std::vector<RunTimeOperandInfo> operands(count);
113 for (size_t i = 0; i < count; i++) {
114 const V1_3::Operand& from = subgraph.operands[i];
115 RunTimeOperandInfo& to = operands[i];
116 to.type = uncheckedConvert(from.type);
117 to.dimensions = from.dimensions;
118 to.scale = from.scale;
119 to.zeroPoint = from.zeroPoint;
120 to.length = from.location.length;
121 to.lifetime = uncheckedConvert(from.lifetime);
122 to.extraParams = uncheckedConvert(from.extraParams);
123 switch (from.lifetime) {
124 case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
125 to.buffer = nullptr;
126 to.numberOfUsesLeft = from.numberOfConsumers;
127 break;
128 case V1_3::OperandLifeTime::CONSTANT_COPY:
129 to.buffer = const_cast<uint8_t*>(&(*mModelOperandValues)[from.location.offset]);
130 to.numberOfUsesLeft = 0;
131 break;
132 case V1_3::OperandLifeTime::CONSTANT_REFERENCE: {
133 auto poolIndex = from.location.poolIndex;
134 CHECK_LT(poolIndex, modelPoolInfos.size());
135 auto& r = modelPoolInfos[poolIndex];
136 to.buffer = r.getBuffer() + from.location.offset;
137 to.numberOfUsesLeft = 0;
138 break;
139 }
140 case V1_3::OperandLifeTime::SUBGRAPH:
141 case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
142 case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
143 case V1_3::OperandLifeTime::NO_VALUE:
144 to.buffer = nullptr;
145 to.numberOfUsesLeft = 0;
146 break;
147 }
148 }
149 return operands;
150 }
151
152 } // namespace
153
154 class Subgraph {
155 public:
Create(const hardware::hidl_vec<V1_3::Operation> & operations,std::vector<RunTimeOperandInfo> & operands,const std::vector<uint32_t> & inputIndexes,const std::vector<uint32_t> & outputIndexes,pthreadpool_t threadpool,bool useStaticBuffer=false)156 static Subgraph* Create(const hardware::hidl_vec<V1_3::Operation>& operations,
157 std::vector<RunTimeOperandInfo>& operands,
158 const std::vector<uint32_t>& inputIndexes,
159 const std::vector<uint32_t>& outputIndexes, pthreadpool_t threadpool,
160 bool useStaticBuffer = false) {
161 // Convert subgraph inputs and outputs to hash sets for faster lookup.
162 const std::unordered_set<uint32_t> inputs(inputIndexes.begin(), inputIndexes.end());
163 const std::unordered_set<uint32_t> outputs(outputIndexes.begin(), outputIndexes.end());
164 std::unordered_set<uint32_t> externals(outputs);
165
166 xnn_subgraph_t subgraphPtr = nullptr;
167 xnn_status status = xnn_create_subgraph(
168 /*external_value_ids=*/operands.size(), /*flags=*/0, &subgraphPtr);
169 if (status != xnn_status_success) {
170 LOG(ERROR) << "XNNPACK xnn_create_subgraph FAILED";
171 return nullptr;
172 }
173
174 // Smart pointer to automatically release subgraph on exit.
175 std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
176 subgraphPtr, &xnn_delete_subgraph);
177
178 // Detect which tensors are used as inputs or outputs of any subgraph nodes.
179 // -1 denotes tensor not used in the subgraph.
180 std::vector<int> tensors(operands.size(), -1);
181
182 for (const auto& operation : operations) {
183 const std::vector<uint32_t>& ins = operation.inputs;
184 const std::vector<uint32_t>& outs = operation.outputs;
185 switch (operation.type) {
186 case V1_3::OperationType::MEAN:
187 case V1_3::OperationType::PAD:
188 case V1_3::OperationType::RESHAPE:
189 case V1_3::OperationType::RESIZE_BILINEAR:
190 // Ignore the second input (axes, static padding, or new shape),
191 // because it is represented as parameters of the XNNPACK operator
192 // rather than extra input.
193 {
194 const int t = ins[0];
195 tensors[t] = t;
196 }
197 break;
198 default:
199 // All other operators: process all inputs
200 for (size_t k = 0; k < ins.size(); k++) {
201 if (isScalarType(operands[ins[k]].type)) continue;
202 const int t = ins[k];
203 tensors[t] = t;
204 }
205 }
206 for (size_t k = 0; k < outs.size(); k++) {
207 if (isScalarType(operands[outs[k]].type)) continue;
208 const int t = outs[k];
209 tensors[t] = t;
210 }
211 }
212
213 // XNNPACK Value IDs for NNAPI Operands
214 std::vector<uint32_t> xnnpackTensors(operands.size());
215 for (int t : tensors) {
216 if (t < 0) continue;
217 if (operands[tensors[t]].type != OperandType::TENSOR_FLOAT32) {
218 LOG(ERROR) << "XNNPACK only support FLOAT32 tensors";
219 return nullptr;
220 }
221
222 uint32_t flags = 0;
223 const void* data = nullptr;
224 if (operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_COPY ||
225 operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
226 operands[tensors[t]].lifetime == Operand::LifeTime::POINTER) {
227 data = operands[tensors[t]].buffer;
228 }
229 if (inputs.count(t) != 0) {
230 flags |= XNN_VALUE_FLAG_EXTERNAL_INPUT;
231 CHECK(data == nullptr);
232 VLOG(DRIVER) << "found input tensor, add to external";
233 externals.insert(static_cast<uint32_t>(t));
234 }
235 if (outputs.count(t) != 0) {
236 flags |= XNN_VALUE_FLAG_EXTERNAL_OUTPUT;
237 }
238
239 std::vector<size_t> dims(operands[tensors[t]].dimensions.size());
240 for (size_t i = 0; i < dims.size(); i++) {
241 dims[i] = operands[tensors[t]].dimensions[i];
242 }
243
244 const xnn_status status = xnn_define_tensor_value(
245 subgraph.get(), xnn_datatype_fp32, dims.size(), dims.data(), data,
246 static_cast<uint32_t>(t), flags, &xnnpackTensors[t]);
247 if (status != xnn_status_success) {
248 LOG(ERROR) << "XNNPACK xnn_define_tensor_value failed";
249 return nullptr;
250 }
251 }
252
253 // Create XNNPACK nodes for NNAPI Operations
254 for (const auto& operation : operations) {
255 if (VisitNode(subgraph.get(), operation, operands.data(), xnnpackTensors) !=
256 V1_3::ErrorStatus::NONE) {
257 LOG(ERROR) << "XNNPACK add op failed";
258 return nullptr;
259 }
260 }
261
262 xnn_runtime_t runtimePtr = nullptr;
263 status = xnn_create_runtime_v2(subgraph.get(), threadpool, /*flags=*/0, &runtimePtr);
264 if (status != xnn_status_success) {
265 LOG(ERROR) << "XNNPACK xnn_create_runtime_v2 FAILED";
266 return nullptr;
267 }
268 return new Subgraph(runtimePtr, std::move(externals), useStaticBuffer);
269 }
270
Prepare()271 V1_3::ErrorStatus Prepare() { return V1_3::ErrorStatus::NONE; }
272
Invoke(RunTimeOperandInfo * operands)273 V1_3::ErrorStatus Invoke(RunTimeOperandInfo* operands) {
274 VLOG(DRIVER) << "Subgraph::Invoke() start";
275 if (!mUseStaticBuffer || mFirstRun) {
276 VLOG(DRIVER) << "Setup buffer for Subgraph";
277 std::vector<xnn_external_value> externalValues;
278
279 for (uint32_t t : mExternals) {
280 xnn_external_value value = {.id = 0, .data = nullptr};
281 value.id = t;
282 value.data = operands[t].buffer;
283 externalValues.push_back(value);
284 }
285
286 const xnn_status status =
287 xnn_setup_runtime(mRuntime.get(), externalValues.size(), externalValues.data());
288 if (status != xnn_status_success) {
289 LOG(ERROR) << "XNNPACK xnn_setup_runtime FAILED";
290 return V1_3::ErrorStatus::GENERAL_FAILURE;
291 }
292 mFirstRun = false;
293 }
294 VLOG(DRIVER) << "Subgraph::Invoke() finished xnn_setup_runtime";
295 const xnn_status status = xnn_invoke_runtime(mRuntime.get());
296 if (status != xnn_status_success) {
297 LOG(ERROR) << "XNNPACK xnn_invoke_runtime FAILED";
298 return V1_3::ErrorStatus::GENERAL_FAILURE;
299 }
300
301 return V1_3::ErrorStatus::NONE;
302 }
303
CalculatePadding(int padding,uint32_t * flags)304 static V1_3::ErrorStatus CalculatePadding(int padding, uint32_t* flags) {
305 switch (padding) {
306 case ANEURALNETWORKS_PADDING_SAME:
307 *flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
308 return V1_3::ErrorStatus::NONE;
309 case ANEURALNETWORKS_PADDING_VALID:
310 *flags = 0;
311 return V1_3::ErrorStatus::NONE;
312 default:
313 LOG(ERROR) << "invalid padding mode";
314 return V1_3::ErrorStatus::INVALID_ARGUMENT;
315 }
316 }
317
ConvertActivationToOutputRange(int activation,float * outputMin,float * outputMax)318 static V1_3::ErrorStatus ConvertActivationToOutputRange(int activation, float* outputMin,
319 float* outputMax) {
320 switch (activation) {
321 case ANEURALNETWORKS_FUSED_NONE:
322 *outputMin = -std::numeric_limits<float>::infinity();
323 *outputMax = +std::numeric_limits<float>::infinity();
324 return V1_3::ErrorStatus::NONE;
325 case ANEURALNETWORKS_FUSED_RELU:
326 *outputMin = 0.0f;
327 *outputMax = +std::numeric_limits<float>::infinity();
328 return V1_3::ErrorStatus::NONE;
329 case ANEURALNETWORKS_FUSED_RELU1:
330 *outputMin = -1.0f;
331 *outputMax = +1.0f;
332 return V1_3::ErrorStatus::NONE;
333 case ANEURALNETWORKS_FUSED_RELU6:
334 *outputMin = 0.0f;
335 *outputMax = 6.0f;
336 return V1_3::ErrorStatus::NONE;
337 default:
338 return V1_3::ErrorStatus::INVALID_ARGUMENT;
339 }
340 }
341
CheckConvolutionParams(int32_t stride_width,int32_t stride_height,int32_t dilation_width_factor,int32_t dilation_height_factor)342 static V1_3::ErrorStatus CheckConvolutionParams(int32_t stride_width, int32_t stride_height,
343 int32_t dilation_width_factor,
344 int32_t dilation_height_factor) {
345 if (stride_width <= 0) {
346 return V1_3::ErrorStatus::INVALID_ARGUMENT;
347 }
348 if (stride_height <= 0) {
349 return V1_3::ErrorStatus::INVALID_ARGUMENT;
350 }
351
352 if (dilation_width_factor <= 0) {
353 return V1_3::ErrorStatus::INVALID_ARGUMENT;
354 }
355 if (dilation_height_factor <= 0) {
356 return V1_3::ErrorStatus::INVALID_ARGUMENT;
357 }
358 return V1_3::ErrorStatus::NONE;
359 }
360
CheckDepthwiseConvolutionParams(int32_t stride_width,int32_t stride_height,int32_t dilation_width_factor,int32_t dilation_height_factor,int32_t depth_multiplier,uint32_t output_channels)361 static V1_3::ErrorStatus CheckDepthwiseConvolutionParams(
362 int32_t stride_width, int32_t stride_height, int32_t dilation_width_factor,
363 int32_t dilation_height_factor, int32_t depth_multiplier, uint32_t output_channels) {
364 if (stride_width <= 0) {
365 return V1_3::ErrorStatus::INVALID_ARGUMENT;
366 }
367 if (stride_height <= 0) {
368 return V1_3::ErrorStatus::INVALID_ARGUMENT;
369 }
370
371 if (depth_multiplier <= 0) {
372 return V1_3::ErrorStatus::INVALID_ARGUMENT;
373 }
374 if (output_channels % depth_multiplier != 0) {
375 return V1_3::ErrorStatus::INVALID_ARGUMENT;
376 }
377
378 if (dilation_width_factor <= 0) {
379 return V1_3::ErrorStatus::INVALID_ARGUMENT;
380 }
381 if (dilation_height_factor <= 0) {
382 return V1_3::ErrorStatus::INVALID_ARGUMENT;
383 }
384
385 return V1_3::ErrorStatus::NONE;
386 }
387
CheckPoolingParams(int32_t stride_width,int32_t stride_height,int32_t filter_width,int32_t filter_height)388 static V1_3::ErrorStatus CheckPoolingParams(int32_t stride_width, int32_t stride_height,
389 int32_t filter_width, int32_t filter_height) {
390 if (stride_width <= 0) {
391 return V1_3::ErrorStatus::INVALID_ARGUMENT;
392 }
393 if (stride_height <= 0) {
394 return V1_3::ErrorStatus::INVALID_ARGUMENT;
395 }
396
397 if (filter_width <= 0) {
398 return V1_3::ErrorStatus::INVALID_ARGUMENT;
399 }
400 if (filter_height <= 0) {
401 return V1_3::ErrorStatus::INVALID_ARGUMENT;
402 }
403 if (filter_width == 1 && filter_height == 1 && std::max(stride_width, stride_height) > 1) {
404 return V1_3::ErrorStatus::INVALID_ARGUMENT;
405 }
406 return V1_3::ErrorStatus::NONE;
407 }
408
CheckNumInputsAndOutputs(const V1_3::Operation & operation,uint32_t expected_num_inputs,uint32_t expected_num_outputs)409 static V1_3::ErrorStatus CheckNumInputsAndOutputs(const V1_3::Operation& operation,
410 uint32_t expected_num_inputs,
411 uint32_t expected_num_outputs) {
412 if (operation.inputs.size() != expected_num_inputs) {
413 return V1_3::ErrorStatus::INVALID_ARGUMENT;
414 }
415 if (operation.outputs.size() != expected_num_outputs) {
416 return V1_3::ErrorStatus::INVALID_ARGUMENT;
417 }
418 return V1_3::ErrorStatus::NONE;
419 }
420
CheckTensorType(OperandType tensor_type,OperandType expected_type)421 static V1_3::ErrorStatus CheckTensorType(OperandType tensor_type, OperandType expected_type) {
422 if (tensor_type != expected_type) {
423 return V1_3::ErrorStatus::INVALID_ARGUMENT;
424 }
425 return V1_3::ErrorStatus::NONE;
426 }
427
CheckTensorFloatType(OperandType tensor_type)428 static V1_3::ErrorStatus CheckTensorFloatType(OperandType tensor_type) {
429 if (tensor_type != OperandType::TENSOR_FLOAT32) {
430 return V1_3::ErrorStatus::INVALID_ARGUMENT;
431 }
432 return V1_3::ErrorStatus::NONE;
433 }
434
CheckTensorShape(std::vector<uint32_t> & dimensions,uint32_t min_num_dims,uint32_t max_num_dims)435 static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
436 uint32_t min_num_dims, uint32_t max_num_dims) {
437 if (min_num_dims == max_num_dims) {
438 if (dimensions.size() != min_num_dims) {
439 return V1_3::ErrorStatus::INVALID_ARGUMENT;
440 }
441 } else {
442 if (dimensions.size() < min_num_dims || dimensions.size() > max_num_dims) {
443 return V1_3::ErrorStatus::INVALID_ARGUMENT;
444 }
445 }
446 for (size_t i = 0; i < dimensions.size(); i++) {
447 if (dimensions[i] <= 0) {
448 return V1_3::ErrorStatus::INVALID_ARGUMENT;
449 }
450 }
451 return V1_3::ErrorStatus::NONE;
452 }
453
CheckTensorShape(std::vector<uint32_t> & dimensions,int expected_num_dims)454 static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
455 int expected_num_dims) {
456 return CheckTensorShape(dimensions, expected_num_dims, expected_num_dims);
457 }
458
CheckSlopeTensorShape(std::vector<uint32_t> & dimensions)459 static V1_3::ErrorStatus CheckSlopeTensorShape(std::vector<uint32_t>& dimensions) {
460 if (dimensions.size() < 1) {
461 return V1_3::ErrorStatus::INVALID_ARGUMENT;
462 }
463 // Validate that all non-channel dimensions (if any) are exactly 1.
464 for (size_t i = 0; i < dimensions.size() - 1; i++) {
465 if (dimensions[i] != 1) {
466 return V1_3::ErrorStatus::INVALID_ARGUMENT;
467 }
468 }
469 return V1_3::ErrorStatus::NONE;
470 }
471
CheckAxesTensorShape(std::vector<uint32_t> & dimensions)472 static V1_3::ErrorStatus CheckAxesTensorShape(std::vector<uint32_t>& dimensions) {
473 if (dimensions.size() != 1) {
474 return V1_3::ErrorStatus::INVALID_ARGUMENT;
475 }
476 return V1_3::ErrorStatus::NONE;
477 }
478
CheckShapeTensorShape(std::vector<uint32_t> & dimensions)479 static V1_3::ErrorStatus CheckShapeTensorShape(std::vector<uint32_t>& dimensions) {
480 if (dimensions.size() != 1) {
481 return V1_3::ErrorStatus::INVALID_ARGUMENT;
482 }
483 return V1_3::ErrorStatus::NONE;
484 }
485
CheckTensorStaticAllocation(Operand::LifeTime lifetime)486 static V1_3::ErrorStatus CheckTensorStaticAllocation(Operand::LifeTime lifetime) {
487 if (lifetime != Operand::LifeTime::CONSTANT_COPY &&
488 lifetime != Operand::LifeTime::CONSTANT_REFERENCE &&
489 lifetime != Operand::LifeTime::POINTER) {
490 VLOG(DRIVER) << "CheckTensorStaticAllocation: " << toString(convertToV1_3(lifetime));
491 return V1_3::ErrorStatus::INVALID_ARGUMENT;
492 }
493 return V1_3::ErrorStatus::NONE;
494 }
495
VisitNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)496 static V1_3::ErrorStatus VisitNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
497 RunTimeOperandInfo* operands,
498 const std::vector<uint32_t>& xnnpackTensors) {
499 switch (operation.type) {
500 case V1_3::OperationType::ABS:
501 return VisitAbsNode(subgraph, operation, operands, xnnpackTensors);
502 case V1_3::OperationType::ADD:
503 return VisitAddNode(subgraph, operation, operands, xnnpackTensors);
504 case V1_3::OperationType::AVERAGE_POOL_2D:
505 return VisitAveragePool2DNode(subgraph, operation, operands, xnnpackTensors);
506 case V1_3::OperationType::CONV_2D:
507 return VisitConv2DNode(subgraph, operation, operands, xnnpackTensors);
508 case V1_3::OperationType::DEPTHWISE_CONV_2D:
509 return VisitDepthwiseConv2DNode(subgraph, operation, operands, xnnpackTensors);
510 case V1_3::OperationType::DIV:
511 return VisitDivNode(subgraph, operation, operands, xnnpackTensors);
512 case V1_3::OperationType::FLOOR:
513 return VisitFloorNode(subgraph, operation, operands, xnnpackTensors);
514 case V1_3::OperationType::FULLY_CONNECTED:
515 return VisitFullyConnectedNode(subgraph, operation, operands, xnnpackTensors);
516 case V1_3::OperationType::HARD_SWISH:
517 return VisitHardSwishNode(subgraph, operation, operands, xnnpackTensors);
518 case V1_3::OperationType::LOGISTIC:
519 return VisitLogisticNode(subgraph, operation, operands, xnnpackTensors);
520 case V1_3::OperationType::MAX_POOL_2D:
521 return VisitMaxPool2DNode(subgraph, operation, operands, xnnpackTensors);
522 case V1_3::OperationType::MAXIMUM:
523 return VisitMaximumNode(subgraph, operation, operands, xnnpackTensors);
524 case V1_3::OperationType::MEAN:
525 return VisitMeanNode(subgraph, operation, operands, xnnpackTensors);
526 case V1_3::OperationType::MINIMUM:
527 return VisitMinimumNode(subgraph, operation, operands, xnnpackTensors);
528 case V1_3::OperationType::MUL:
529 return VisitMulNode(subgraph, operation, operands, xnnpackTensors);
530 case V1_3::OperationType::NEG:
531 return VisitNegNode(subgraph, operation, operands, xnnpackTensors);
532 case V1_3::OperationType::PAD:
533 return VisitPadNode(subgraph, operation, operands, 0.0f, xnnpackTensors);
534 case V1_3::OperationType::PAD_V2:
535 return VisitPadV2Node(subgraph, operation, operands, xnnpackTensors);
536 case V1_3::OperationType::RESHAPE:
537 return VisitReshapeNode(subgraph, operation, operands, xnnpackTensors);
538 case V1_3::OperationType::RESIZE_BILINEAR:
539 return VisitResizeBilinearNode(subgraph, operation, operands, xnnpackTensors);
540 case V1_3::OperationType::PRELU:
541 return VisitPreluNode(subgraph, operation, operands, xnnpackTensors);
542 case V1_3::OperationType::RELU:
543 return VisitReluNode(subgraph, operation, operands, 0.0f,
544 std::numeric_limits<float>::infinity(), xnnpackTensors);
545 case V1_3::OperationType::RELU1:
546 return VisitReluNode(subgraph, operation, operands, -1.0f, 1.0f, xnnpackTensors);
547 case V1_3::OperationType::RELU6:
548 return VisitReluNode(subgraph, operation, operands, 0.0f, 6.0f, xnnpackTensors);
549 case V1_3::OperationType::SQRT:
550 return VisitSqrtNode(subgraph, operation, operands, xnnpackTensors);
551 case V1_3::OperationType::SUB:
552 return VisitSubNode(subgraph, operation, operands, xnnpackTensors);
553 case V1_3::OperationType::SOFTMAX:
554 return VisitSoftmaxNode(subgraph, operation, operands, xnnpackTensors);
555 default:
556 return V1_3::ErrorStatus::INVALID_ARGUMENT;
557 }
558 }
559
VisitAbsNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)560 static V1_3::ErrorStatus VisitAbsNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
561 RunTimeOperandInfo* operands,
562 const std::vector<uint32_t>& xnnpackTensors) {
563 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
564 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
565 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
566 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
567
568 if (subgraph != nullptr) {
569 const xnn_status status =
570 xnn_define_abs(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
571 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
572 if (status != xnn_status_success) {
573 LOG(ERROR) << "XNNPACK xnn_define_abs FAILED";
574 return V1_3::ErrorStatus::GENERAL_FAILURE;
575 }
576 }
577 return V1_3::ErrorStatus::NONE;
578 }
579
VisitAddNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)580 static V1_3::ErrorStatus VisitAddNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
581 RunTimeOperandInfo* operands,
582 const std::vector<uint32_t>& xnnpackTensors) {
583 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
584 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
585 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
586 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
587 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
588 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
589
590 float outputMin = -std::numeric_limits<float>::infinity();
591 float outputMax = +std::numeric_limits<float>::infinity();
592 int activation = getScalarData<int32_t>(operands[ins[2]]);
593 NN_DRIVER_RETURN_IF_ERROR(
594 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
595
596 if (subgraph != nullptr) {
597 const xnn_status status =
598 xnn_define_add2(subgraph, outputMin, outputMax,
599 /*input1_id=*/xnnpackTensors[ins[0]],
600 /*input2_id=*/xnnpackTensors[ins[1]],
601 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
602 if (status != xnn_status_success) {
603 LOG(ERROR) << "XNNPACK xnn_define_add2 FAILED";
604 return V1_3::ErrorStatus::GENERAL_FAILURE;
605 }
606 }
607 return V1_3::ErrorStatus::NONE;
608 }
609
VisitAveragePool2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)610 static V1_3::ErrorStatus VisitAveragePool2DNode(xnn_subgraph_t subgraph,
611 const V1_3::Operation& operation,
612 RunTimeOperandInfo* operands,
613 const std::vector<uint32_t>& xnnpackTensors) {
614 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
615 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
616 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
617 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
618 // Make sure all scalar params are constant.
619 for (uint32_t i = 1; i < ins.size(); i++) {
620 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
621 }
622
623 bool use_nchw = false;
624 if (ins.size() == 8) {
625 use_nchw = getScalarData<bool>(operands[ins[7]]);
626 }
627 if (ins.size() == 11) {
628 use_nchw = getScalarData<bool>(operands[ins[10]]);
629 }
630 if (use_nchw) {
631 VLOG(DRIVER) << "XNNPACK VisitAveragePool2DNode FAILED: only NHWC layout is supported";
632 return V1_3::ErrorStatus::INVALID_ARGUMENT;
633 }
634
635 int32_t stride_width, stride_height, filter_width, filter_height, activation;
636 uint32_t input_padding_top = 0;
637 uint32_t input_padding_right = 0;
638 uint32_t input_padding_bottom = 0;
639 uint32_t input_padding_left = 0;
640 uint32_t flags = 0;
641 if (ins.size() >= 10) {
642 // Explicit padding
643 input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
644 input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
645 input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
646 input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
647 stride_width = getScalarData<int32_t>(operands[ins[5]]);
648 stride_height = getScalarData<int32_t>(operands[ins[6]]);
649 filter_width = getScalarData<int32_t>(operands[ins[7]]);
650 filter_height = getScalarData<int32_t>(operands[ins[8]]);
651 activation = getScalarData<int32_t>(operands[ins[9]]);
652 } else {
653 // Implicit padding
654 int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
655 NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
656 stride_width = getScalarData<int32_t>(operands[ins[2]]);
657 stride_height = getScalarData<int32_t>(operands[ins[3]]);
658 filter_width = getScalarData<int32_t>(operands[ins[4]]);
659 filter_height = getScalarData<int32_t>(operands[ins[5]]);
660 activation = getScalarData<int32_t>(operands[ins[6]]);
661 }
662 NN_DRIVER_RETURN_IF_ERROR(
663 CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
664
665 float outputMin = -std::numeric_limits<float>::infinity();
666 float outputMax = +std::numeric_limits<float>::infinity();
667 NN_DRIVER_RETURN_IF_ERROR(
668 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
669
670 if (subgraph != nullptr) {
671 xnn_status status = xnn_status_success;
672 if (filter_width == 1 && filter_height == 1) {
673 status = xnn_define_clamp(subgraph, outputMin, outputMax,
674 /*input_id=*/xnnpackTensors[ins[0]],
675 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
676 } else {
677 status = xnn_define_average_pooling_2d(
678 subgraph, input_padding_top, input_padding_right, input_padding_bottom,
679 input_padding_left, static_cast<uint32_t>(filter_height),
680 static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
681 static_cast<uint32_t>(stride_width), outputMin, outputMax,
682 /*input_id=*/xnnpackTensors[ins[0]],
683 /*output_id=*/xnnpackTensors[outs[0]], flags);
684 }
685 if (status != xnn_status_success) {
686 LOG(ERROR) << "XNNPACK xnn_define_average_pooling_2d FAILED";
687 return V1_3::ErrorStatus::GENERAL_FAILURE;
688 }
689 }
690 return V1_3::ErrorStatus::NONE;
691 }
692
VisitConv2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)693 static V1_3::ErrorStatus VisitConv2DNode(xnn_subgraph_t subgraph,
694 const V1_3::Operation& operation,
695 RunTimeOperandInfo* operands,
696 const std::vector<uint32_t>& xnnpackTensors) {
697 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
698 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
699 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
700 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
701 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
702 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
703 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
704 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
705 // Make sure all scalar params are constant.
706 for (uint32_t i = 3; i < ins.size(); i++) {
707 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
708 }
709
710 bool use_nchw = false;
711 if (ins.size() >= 8 && operands[ins[7]].type == OperandType::BOOL) {
712 use_nchw = getScalarData<bool>(operands[ins[7]]);
713 }
714 if (ins.size() >= 11) {
715 use_nchw = getScalarData<bool>(operands[ins[10]]);
716 }
717 if (use_nchw) {
718 VLOG(DRIVER) << "XNNPACK VisitConv2DNode FAILED: only NHWC layout is supported";
719 return V1_3::ErrorStatus::INVALID_ARGUMENT;
720 }
721
722 int32_t stride_width, stride_height, activation;
723 int32_t dilation_width_factor = 1;
724 int32_t dilation_height_factor = 1;
725 uint32_t input_padding_top = 0;
726 uint32_t input_padding_right = 0;
727 uint32_t input_padding_bottom = 0;
728 uint32_t input_padding_left = 0;
729 uint32_t flags = 0;
730 if (ins.size() >= 10 && operands[ins[7]].type != OperandType::BOOL) {
731 // Explicit padding
732 input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
733 input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
734 input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
735 input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
736 stride_width = getScalarData<int32_t>(operands[ins[7]]);
737 stride_height = getScalarData<int32_t>(operands[ins[8]]);
738 activation = getScalarData<int32_t>(operands[ins[9]]);
739 if (ins.size() == 13) {
740 dilation_width_factor = getScalarData<int32_t>(operands[ins[11]]);
741 dilation_height_factor = getScalarData<int32_t>(operands[ins[12]]);
742 }
743 } else {
744 // Implicit padding
745 int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
746 NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
747 stride_width = getScalarData<int32_t>(operands[ins[4]]);
748 stride_height = getScalarData<int32_t>(operands[ins[5]]);
749 activation = getScalarData<int32_t>(operands[ins[6]]);
750 if (ins.size() == 10) {
751 dilation_width_factor = getScalarData<int32_t>(operands[ins[8]]);
752 dilation_height_factor = getScalarData<int32_t>(operands[ins[9]]);
753 }
754 }
755 NN_DRIVER_RETURN_IF_ERROR(CheckConvolutionParams(
756 stride_width, stride_height, dilation_width_factor, dilation_height_factor));
757
758 float outputMin = -std::numeric_limits<float>::infinity();
759 float outputMax = +std::numeric_limits<float>::infinity();
760 NN_DRIVER_RETURN_IF_ERROR(
761 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
762
763 const RunTimeOperandInfo& filter = operands[ins[1]];
764 const uint32_t output_channels = filter.dimensions[0];
765 const uint32_t kernel_height = filter.dimensions[1];
766 const uint32_t kernel_width = filter.dimensions[2];
767 const uint32_t input_channels = filter.dimensions[3];
768
769 if (subgraph != nullptr) {
770 const xnn_status status = xnn_define_convolution_2d(
771 subgraph, input_padding_top, input_padding_right, input_padding_bottom,
772 input_padding_left, static_cast<uint32_t>(kernel_height),
773 static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
774 static_cast<uint32_t>(stride_width),
775 static_cast<uint32_t>(dilation_height_factor),
776 static_cast<uint32_t>(dilation_width_factor),
777 /*groups=*/1, static_cast<size_t>(input_channels),
778 static_cast<size_t>(output_channels), outputMin, outputMax,
779 /*input_id=*/xnnpackTensors[ins[0]],
780 /*filter_id=*/xnnpackTensors[ins[1]],
781 /*bias_id=*/xnnpackTensors[ins[2]],
782 /*output_id=*/xnnpackTensors[outs[0]], flags);
783 if (status != xnn_status_success) {
784 LOG(ERROR) << "XNNPACK xnn_define_convolution_2d FAILED";
785 return V1_3::ErrorStatus::GENERAL_FAILURE;
786 }
787 }
788
789 return V1_3::ErrorStatus::NONE;
790 }
791
VisitDepthwiseConv2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)792 static V1_3::ErrorStatus VisitDepthwiseConv2DNode(xnn_subgraph_t subgraph,
793 const V1_3::Operation& operation,
794 RunTimeOperandInfo* operands,
795 const std::vector<uint32_t>& xnnpackTensors) {
796 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
797 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
798 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
799 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
800 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
801 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
802 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
803 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
804 // Make sure all scalar params are constant.
805 for (uint32_t i = 3; i < ins.size(); i++) {
806 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
807 }
808
809 bool use_nchw = false;
810 if (ins.size() >= 9 && operands[ins[8]].type == OperandType::BOOL) {
811 use_nchw = getScalarData<bool>(operands[ins[8]]);
812 }
813 if (ins.size() >= 12) {
814 use_nchw = getScalarData<bool>(operands[ins[11]]);
815 }
816 if (use_nchw) {
817 VLOG(DRIVER)
818 << "XNNPACK VisitDepthwiseConv2DNode FAILED: only NHWC layout is supported";
819 return V1_3::ErrorStatus::INVALID_ARGUMENT;
820 }
821
822 int32_t stride_width, stride_height, depth_multiplier, activation;
823 int32_t dilation_width_factor = 1;
824 int32_t dilation_height_factor = 1;
825 uint32_t input_padding_top = 0;
826 uint32_t input_padding_right = 0;
827 uint32_t input_padding_bottom = 0;
828 uint32_t input_padding_left = 0;
829 uint32_t flags = 0;
830 if (ins.size() >= 11 && operands[ins[8]].type != OperandType::BOOL) {
831 // Explicit padding
832 input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
833 input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
834 input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
835 input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
836 stride_width = getScalarData<int32_t>(operands[ins[7]]);
837 stride_height = getScalarData<int32_t>(operands[ins[8]]);
838 depth_multiplier = getScalarData<int32_t>(operands[ins[9]]);
839 activation = getScalarData<int32_t>(operands[ins[10]]);
840 if (ins.size() == 14) {
841 dilation_width_factor = getScalarData<int32_t>(operands[ins[12]]);
842 dilation_height_factor = getScalarData<int32_t>(operands[ins[13]]);
843 }
844 } else {
845 // Implicit padding
846 int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
847 NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
848 stride_width = getScalarData<int32_t>(operands[ins[4]]);
849 stride_height = getScalarData<int32_t>(operands[ins[5]]);
850 depth_multiplier = getScalarData<int32_t>(operands[ins[6]]);
851 activation = getScalarData<int32_t>(operands[ins[7]]);
852 if (ins.size() == 11) {
853 dilation_width_factor = getScalarData<int32_t>(operands[ins[9]]);
854 dilation_height_factor = getScalarData<int32_t>(operands[ins[10]]);
855 }
856 }
857 float outputMin = -std::numeric_limits<float>::infinity();
858 float outputMax = +std::numeric_limits<float>::infinity();
859 NN_DRIVER_RETURN_IF_ERROR(
860 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
861
862 const RunTimeOperandInfo& filter = operands[ins[1]];
863 const uint32_t output_channels = filter.dimensions[3];
864 const uint32_t kernel_height = filter.dimensions[1];
865 const uint32_t kernel_width = filter.dimensions[2];
866 NN_DRIVER_RETURN_IF_ERROR(CheckDepthwiseConvolutionParams(
867 stride_width, stride_height, dilation_width_factor, dilation_height_factor,
868 depth_multiplier, output_channels));
869
870 if (subgraph != nullptr) {
871 const xnn_status status = xnn_define_depthwise_convolution_2d(
872 subgraph, input_padding_top, input_padding_right, input_padding_bottom,
873 input_padding_left, static_cast<uint32_t>(kernel_height),
874 static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
875 static_cast<uint32_t>(stride_width),
876 static_cast<uint32_t>(dilation_height_factor),
877 static_cast<uint32_t>(dilation_width_factor),
878 static_cast<uint32_t>(depth_multiplier),
879 /*input_channels=*/
880 static_cast<uint32_t>(output_channels / depth_multiplier), outputMin, outputMax,
881 /*input_id=*/xnnpackTensors[ins[0]],
882 /*filter_id=*/xnnpackTensors[ins[1]],
883 /*bias_id=*/xnnpackTensors[ins[2]],
884 /*output_id=*/xnnpackTensors[outs[0]], flags);
885 if (status != xnn_status_success) {
886 LOG(ERROR) << "XNNPACK xnn_define_depthwise_convolution_2d FAILED";
887 return V1_3::ErrorStatus::GENERAL_FAILURE;
888 }
889 }
890 return V1_3::ErrorStatus::NONE;
891 }
892
VisitDivNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)893 static V1_3::ErrorStatus VisitDivNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
894 RunTimeOperandInfo* operands,
895 const std::vector<uint32_t>& xnnpackTensors) {
896 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
897 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
898 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
899 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
900 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
901 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
902
903 float outputMin = -std::numeric_limits<float>::infinity();
904 float outputMax = +std::numeric_limits<float>::infinity();
905 int activation = getScalarData<int32_t>(operands[ins[2]]);
906 NN_DRIVER_RETURN_IF_ERROR(
907 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
908
909 if (subgraph != nullptr) {
910 const xnn_status status =
911 xnn_define_divide(subgraph, outputMin, outputMax,
912 /*input1_id=*/xnnpackTensors[ins[0]],
913 /*input2_id=*/xnnpackTensors[ins[1]],
914 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
915 if (status != xnn_status_success) {
916 LOG(ERROR) << "XNNPACK xnn_define_divide FAILED";
917 return V1_3::ErrorStatus::GENERAL_FAILURE;
918 }
919 }
920 return V1_3::ErrorStatus::NONE;
921 }
922
VisitFullyConnectedNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)923 static V1_3::ErrorStatus VisitFullyConnectedNode(xnn_subgraph_t subgraph,
924 const V1_3::Operation& operation,
925 RunTimeOperandInfo* operands,
926 const std::vector<uint32_t>& xnnpackTensors) {
927 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
928 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
929 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
930 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
931 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
932 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
933 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
934 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[3]].lifetime));
935 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
936
937 float outputMin = -std::numeric_limits<float>::infinity();
938 float outputMax = +std::numeric_limits<float>::infinity();
939 int activation = getScalarData<int32_t>(operands[ins[3]]);
940 NN_DRIVER_RETURN_IF_ERROR(
941 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
942
943 if (subgraph != nullptr) {
944 const xnn_status status =
945 xnn_define_fully_connected(subgraph, outputMin, outputMax,
946 /*input_id=*/xnnpackTensors[ins[0]],
947 /*filter_id=*/xnnpackTensors[ins[1]],
948 /*bias_id=*/xnnpackTensors[ins[2]],
949 /*output_id=*/xnnpackTensors[outs[0]],
950 /*flags=*/XNN_FLAG_TENSORFLOW_RESHAPE_2D);
951 if (status != xnn_status_success) {
952 LOG(ERROR) << "XNNPACK xnn_define_fully_connected FAILED";
953 return V1_3::ErrorStatus::GENERAL_FAILURE;
954 }
955 }
956 return V1_3::ErrorStatus::NONE;
957 }
958
VisitFloorNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)959 static V1_3::ErrorStatus VisitFloorNode(xnn_subgraph_t subgraph,
960 const V1_3::Operation& operation,
961 RunTimeOperandInfo* operands,
962 const std::vector<uint32_t>& xnnpackTensors) {
963 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
964 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
965 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
966 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
967
968 if (subgraph != nullptr) {
969 const xnn_status status =
970 xnn_define_floor(subgraph,
971 /*input1_id=*/xnnpackTensors[ins[0]],
972 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
973 if (status != xnn_status_success) {
974 LOG(ERROR) << "XNNPACK xnn_define_floor FAILED";
975 return V1_3::ErrorStatus::GENERAL_FAILURE;
976 }
977 }
978 return V1_3::ErrorStatus::NONE;
979 }
980
VisitHardSwishNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)981 static V1_3::ErrorStatus VisitHardSwishNode(xnn_subgraph_t subgraph,
982 const V1_3::Operation& operation,
983 RunTimeOperandInfo* operands,
984 const std::vector<uint32_t>& xnnpackTensors) {
985 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
986 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
987 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
988 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
989
990 if (subgraph != nullptr) {
991 const xnn_status status =
992 xnn_define_hardswish(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
993 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
994 if (status != xnn_status_success) {
995 LOG(ERROR) << "XNNPACK xnn_define_hardswish FAILED";
996 return V1_3::ErrorStatus::GENERAL_FAILURE;
997 }
998 }
999 return V1_3::ErrorStatus::NONE;
1000 }
1001
VisitLogisticNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1002 static V1_3::ErrorStatus VisitLogisticNode(xnn_subgraph_t subgraph,
1003 const V1_3::Operation& operation,
1004 RunTimeOperandInfo* operands,
1005 const std::vector<uint32_t>& xnnpackTensors) {
1006 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1007 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1008 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1009 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1010
1011 if (subgraph != nullptr) {
1012 const xnn_status status =
1013 xnn_define_sigmoid(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1014 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1015 if (status != xnn_status_success) {
1016 LOG(ERROR) << "XNNPACK xnn_define_sigmoid FAILED";
1017 return V1_3::ErrorStatus::GENERAL_FAILURE;
1018 }
1019 }
1020 return V1_3::ErrorStatus::NONE;
1021 }
1022
VisitMaxPool2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1023 static V1_3::ErrorStatus VisitMaxPool2DNode(xnn_subgraph_t subgraph,
1024 const V1_3::Operation& operation,
1025 RunTimeOperandInfo* operands,
1026 const std::vector<uint32_t>& xnnpackTensors) {
1027 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1028 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1029 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1030 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1031 // Make sure all scalar params are constant.
1032 for (uint32_t i = 1; i < ins.size(); i++) {
1033 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
1034 }
1035
1036 bool use_nchw = false;
1037 if (ins.size() == 8) {
1038 use_nchw = getScalarData<bool>(operands[ins[7]]);
1039 }
1040 if (ins.size() == 11) {
1041 use_nchw = getScalarData<bool>(operands[ins[10]]);
1042 }
1043 if (use_nchw) {
1044 VLOG(DRIVER) << "XNNPACK VisitMaxPool2DNode FAILED: only NHWC layout is supported";
1045 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1046 }
1047
1048 int32_t stride_width, stride_height, filter_width, filter_height, activation;
1049 uint32_t input_padding_top = 0;
1050 uint32_t input_padding_right = 0;
1051 uint32_t input_padding_bottom = 0;
1052 uint32_t input_padding_left = 0;
1053 uint32_t flags = 0;
1054 if (ins.size() >= 10) {
1055 // Explicit padding
1056 input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
1057 input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
1058 input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
1059 input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
1060 stride_width = getScalarData<int32_t>(operands[ins[5]]);
1061 stride_height = getScalarData<int32_t>(operands[ins[6]]);
1062 filter_width = getScalarData<int32_t>(operands[ins[7]]);
1063 filter_height = getScalarData<int32_t>(operands[ins[8]]);
1064 activation = getScalarData<int32_t>(operands[ins[9]]);
1065 } else {
1066 // Implicit padding
1067 int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
1068 NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
1069 stride_width = getScalarData<int32_t>(operands[ins[2]]);
1070 stride_height = getScalarData<int32_t>(operands[ins[3]]);
1071 filter_width = getScalarData<int32_t>(operands[ins[4]]);
1072 filter_height = getScalarData<int32_t>(operands[ins[5]]);
1073 activation = getScalarData<int32_t>(operands[ins[6]]);
1074 }
1075 NN_DRIVER_RETURN_IF_ERROR(
1076 CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
1077
1078 float outputMin = -std::numeric_limits<float>::infinity();
1079 float outputMax = +std::numeric_limits<float>::infinity();
1080 NN_DRIVER_RETURN_IF_ERROR(
1081 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1082
1083 if (subgraph != nullptr) {
1084 xnn_status status = xnn_status_success;
1085 if (filter_width == 1 && filter_height == 1) {
1086 status = xnn_define_clamp(subgraph, outputMin, outputMax,
1087 /*input_id=*/xnnpackTensors[ins[0]],
1088 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1089 } else {
1090 status = xnn_define_max_pooling_2d(
1091 subgraph, input_padding_top, input_padding_right, input_padding_bottom,
1092 input_padding_left, static_cast<uint32_t>(filter_height),
1093 static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
1094 static_cast<uint32_t>(stride_width), /*dilation_height=*/1,
1095 /*dilation_width=*/1, outputMin, outputMax,
1096 /*input_id=*/xnnpackTensors[ins[0]],
1097 /*output_id=*/xnnpackTensors[outs[0]], flags);
1098 }
1099 if (status != xnn_status_success) {
1100 LOG(ERROR) << "XNNPACK xnn_define_max_pooling_2d FAILED";
1101 return V1_3::ErrorStatus::GENERAL_FAILURE;
1102 }
1103 }
1104 return V1_3::ErrorStatus::NONE;
1105 }
1106
VisitMaximumNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1107 static V1_3::ErrorStatus VisitMaximumNode(xnn_subgraph_t subgraph,
1108 const V1_3::Operation& operation,
1109 RunTimeOperandInfo* operands,
1110 const std::vector<uint32_t>& xnnpackTensors) {
1111 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1112 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1113 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1114 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1115 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1116 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1117
1118 float outputMin = -std::numeric_limits<float>::infinity();
1119 float outputMax = +std::numeric_limits<float>::infinity();
1120 int activation = getScalarData<int32_t>(operands[ins[2]]);
1121 NN_DRIVER_RETURN_IF_ERROR(
1122 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1123
1124 if (subgraph != nullptr) {
1125 const xnn_status status =
1126 xnn_define_maximum2(subgraph,
1127 /*input1_id=*/xnnpackTensors[ins[0]],
1128 /*input2_id=*/xnnpackTensors[ins[1]],
1129 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1130 if (status != xnn_status_success) {
1131 LOG(ERROR) << "XNNPACK xnn_define_maximum2 FAILED";
1132 return V1_3::ErrorStatus::GENERAL_FAILURE;
1133 }
1134 }
1135 return V1_3::ErrorStatus::NONE;
1136 }
1137
VisitMeanNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1138 static V1_3::ErrorStatus VisitMeanNode(xnn_subgraph_t subgraph,
1139 const V1_3::Operation& operation,
1140 RunTimeOperandInfo* operands,
1141 const std::vector<uint32_t>& xnnpackTensors) {
1142 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1143 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1144 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1145 NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
1146 NN_DRIVER_RETURN_IF_ERROR(CheckAxesTensorShape(operands[ins[1]].dimensions));
1147 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1148 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1149 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1150 NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
1151
1152 int keep_dims = getScalarData<int32_t>(operands[ins[2]]);
1153 if (keep_dims <= 0) {
1154 LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: only support keep_dims";
1155 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1156 }
1157 const int32_t* axes_buffer = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
1158 if (operands[ins[1]].dimensions[0] != 2) {
1159 LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
1160 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1161 }
1162 if (std::min(axes_buffer[0], axes_buffer[1]) != 1 ||
1163 std::max(axes_buffer[0], axes_buffer[1]) != 2) {
1164 LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
1165 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1166 }
1167 if (subgraph != nullptr) {
1168 const xnn_status status = xnn_define_global_average_pooling_2d(
1169 subgraph,
1170 /*outputMin=*/-std::numeric_limits<float>::infinity(),
1171 /*outputMax=*/+std::numeric_limits<float>::infinity(),
1172 /*input_id=*/xnnpackTensors[ins[0]],
1173 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1174 if (status != xnn_status_success) {
1175 LOG(ERROR) << "XNNPACK xnn_define_global_average_pooling_2d FAILED";
1176 return V1_3::ErrorStatus::GENERAL_FAILURE;
1177 }
1178 }
1179 return V1_3::ErrorStatus::NONE;
1180 }
1181
VisitMinimumNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1182 static V1_3::ErrorStatus VisitMinimumNode(xnn_subgraph_t subgraph,
1183 const V1_3::Operation& operation,
1184 RunTimeOperandInfo* operands,
1185 const std::vector<uint32_t>& xnnpackTensors) {
1186 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1187 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1188 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1189 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1190 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1191 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1192
1193 float outputMin = -std::numeric_limits<float>::infinity();
1194 float outputMax = +std::numeric_limits<float>::infinity();
1195 int activation = getScalarData<int32_t>(operands[ins[2]]);
1196 NN_DRIVER_RETURN_IF_ERROR(
1197 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1198
1199 if (subgraph != nullptr) {
1200 const xnn_status status =
1201 xnn_define_minimum2(subgraph,
1202 /*input1_id=*/xnnpackTensors[ins[0]],
1203 /*input2_id=*/xnnpackTensors[ins[1]],
1204 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1205 if (status != xnn_status_success) {
1206 LOG(ERROR) << "XNNPACK xnn_define_minimum2 FAILED";
1207 return V1_3::ErrorStatus::GENERAL_FAILURE;
1208 }
1209 }
1210 return V1_3::ErrorStatus::NONE;
1211 }
1212
VisitMulNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1213 static V1_3::ErrorStatus VisitMulNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1214 RunTimeOperandInfo* operands,
1215 const std::vector<uint32_t>& xnnpackTensors) {
1216 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1217 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1218 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1219 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1220 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1221 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1222
1223 int activation = getScalarData<int32_t>(operands[ins[2]]);
1224 float outputMin = -std::numeric_limits<float>::infinity();
1225 float outputMax = +std::numeric_limits<float>::infinity();
1226 NN_DRIVER_RETURN_IF_ERROR(
1227 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1228
1229 if (subgraph != nullptr) {
1230 const xnn_status status =
1231 xnn_define_multiply2(subgraph, outputMin, outputMax,
1232 /*input1_id=*/xnnpackTensors[ins[0]],
1233 /*input2_id=*/xnnpackTensors[ins[1]],
1234 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1235 if (status != xnn_status_success) {
1236 LOG(ERROR) << "XNNPACK xnn_define_multiply2 FAILED";
1237 return V1_3::ErrorStatus::GENERAL_FAILURE;
1238 }
1239 }
1240 return V1_3::ErrorStatus::NONE;
1241 }
1242
VisitNegNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1243 static V1_3::ErrorStatus VisitNegNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1244 RunTimeOperandInfo* operands,
1245 const std::vector<uint32_t>& xnnpackTensors) {
1246 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1247 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1248 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1249 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1250
1251 if (subgraph != nullptr) {
1252 const xnn_status status =
1253 xnn_define_negate(subgraph,
1254 /*input1_id=*/xnnpackTensors[ins[0]],
1255 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1256 if (status != xnn_status_success) {
1257 LOG(ERROR) << "XNNPACK xnn_define_negate FAILED";
1258 return V1_3::ErrorStatus::GENERAL_FAILURE;
1259 }
1260 }
1261 return V1_3::ErrorStatus::NONE;
1262 }
1263
VisitPreluNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1264 static V1_3::ErrorStatus VisitPreluNode(xnn_subgraph_t subgraph,
1265 const V1_3::Operation& operation,
1266 RunTimeOperandInfo* operands,
1267 const std::vector<uint32_t>& xnnpackTensors) {
1268 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1269 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1270 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1271 NN_DRIVER_RETURN_IF_ERROR(
1272 CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1273 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1274 NN_DRIVER_RETURN_IF_ERROR(CheckSlopeTensorShape(operands[ins[1]].dimensions));
1275 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1276 NN_DRIVER_RETURN_IF_ERROR(
1277 CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1278
1279 if (subgraph != nullptr) {
1280 const xnn_status status =
1281 xnn_define_prelu(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1282 /*slope_id=*/xnnpackTensors[ins[1]],
1283 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1284 if (status != xnn_status_success) {
1285 LOG(ERROR) << "XNNPACK xnn_define_prelu FAILED";
1286 return V1_3::ErrorStatus::GENERAL_FAILURE;
1287 }
1288 }
1289 return V1_3::ErrorStatus::NONE;
1290 }
1291
VisitPadNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,float padding_value,const std::vector<uint32_t> & xnnpackTensors)1292 static V1_3::ErrorStatus VisitPadNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1293 RunTimeOperandInfo* operands, float padding_value,
1294 const std::vector<uint32_t>& xnnpackTensors) {
1295 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1296 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1297 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1298 NN_DRIVER_RETURN_IF_ERROR(
1299 CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1300 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1301 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1302 NN_DRIVER_RETURN_IF_ERROR(
1303 CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1304
1305 const int32_t* paddings_data = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
1306 for (size_t i = 0; i < operands[ins[1]].dimensions.size() * 2; i++) {
1307 if (paddings_data[i] < 0) return V1_3::ErrorStatus::INVALID_ARGUMENT;
1308 }
1309 if (subgraph != nullptr) {
1310 std::array<size_t, XNN_MAX_TENSOR_DIMS> pre_paddings{};
1311 std::array<size_t, XNN_MAX_TENSOR_DIMS> post_paddings{};
1312 for (size_t i = 0; i < operands[ins[1]].dimensions.size(); i++) {
1313 pre_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 0]);
1314 post_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 1]);
1315 }
1316 const xnn_status status = xnn_define_static_constant_pad(
1317 subgraph, pre_paddings.data(), post_paddings.data(), padding_value,
1318 /*input_id=*/xnnpackTensors[ins[0]],
1319 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1320 if (status != xnn_status_success) {
1321 LOG(ERROR) << "XNNPACK xnn_define_static_constant_pad FAILED";
1322 return V1_3::ErrorStatus::GENERAL_FAILURE;
1323 }
1324 }
1325 return V1_3::ErrorStatus::NONE;
1326 }
1327
VisitPadV2Node(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1328 static V1_3::ErrorStatus VisitPadV2Node(xnn_subgraph_t subgraph,
1329 const V1_3::Operation& operation,
1330 RunTimeOperandInfo* operands,
1331 const std::vector<uint32_t>& xnnpackTensors) {
1332 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1333 if (operands[ins[2]].type != OperandType::FLOAT32) {
1334 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1335 }
1336 float padding_value = getScalarData<float>(operands[ins[2]]);
1337 return VisitPadNode(subgraph, operation, operands, padding_value, xnnpackTensors);
1338 }
1339
VisitReshapeNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1340 static V1_3::ErrorStatus VisitReshapeNode(xnn_subgraph_t subgraph,
1341 const V1_3::Operation& operation,
1342 RunTimeOperandInfo* operands,
1343 const std::vector<uint32_t>& xnnpackTensors) {
1344 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1345 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1346 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1347 NN_DRIVER_RETURN_IF_ERROR(
1348 CheckTensorShape(operands[ins[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
1349 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1350 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1351 NN_DRIVER_RETURN_IF_ERROR(
1352 CheckTensorShape(operands[outs[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
1353
1354 if (subgraph != nullptr) {
1355 std::array<size_t, XNN_MAX_TENSOR_DIMS> new_shape;
1356 for (uint32_t i = 0; i < operands[outs[0]].dimensions.size(); i++) {
1357 new_shape[i] = static_cast<size_t>(operands[outs[0]].dimensions[i]);
1358 }
1359 const xnn_status status = xnn_define_static_reshape(
1360 subgraph, static_cast<size_t>(operands[outs[0]].dimensions.size()),
1361 new_shape.data(),
1362 /*input_id=*/xnnpackTensors[ins[0]],
1363 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1364 if (status != xnn_status_success) {
1365 LOG(ERROR) << "XNNPACK xnn_define_static_reshape FAILED";
1366 return V1_3::ErrorStatus::GENERAL_FAILURE;
1367 }
1368 }
1369 return V1_3::ErrorStatus::NONE;
1370 }
1371
VisitResizeBilinearNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1372 static V1_3::ErrorStatus VisitResizeBilinearNode(xnn_subgraph_t subgraph,
1373 const V1_3::Operation& operation,
1374 RunTimeOperandInfo* operands,
1375 const std::vector<uint32_t>& xnnpackTensors) {
1376 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1377 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1378 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1379 NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
1380 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1381 NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
1382 // Make sure all scalar params are constant.
1383 for (uint32_t i = 1; i < ins.size(); i++) {
1384 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
1385 }
1386
1387 if (ins.size() >= 4) {
1388 bool use_nchw = getScalarData<bool>(operands[ins[3]]);
1389 if (use_nchw) {
1390 VLOG(DRIVER)
1391 << "XNNPACK VisitResizeBilinearNode FAILED: only NHWC layout is supported";
1392 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1393 }
1394 }
1395
1396 size_t new_height, new_width;
1397 if (operands[ins[1]].type == OperandType::INT32) {
1398 // explicitly specify the output dimension.
1399 new_width = static_cast<size_t>(getScalarData<int32_t>(operands[ins[1]]));
1400 new_height = static_cast<size_t>(getScalarData<int32_t>(operands[ins[2]]));
1401 } else if (operands[ins[1]].type == OperandType::FLOAT32) {
1402 // specify the output dimension scaling factor.
1403 float width_scale = getScalarData<float>(operands[ins[1]]);
1404 float height_scale = getScalarData<float>(operands[ins[2]]);
1405 if (width_scale <= 0 || height_scale <= 0) {
1406 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1407 }
1408 new_height = static_cast<size_t>(operands[ins[0]].dimensions[1] * height_scale);
1409 new_width = static_cast<size_t>(operands[ins[0]].dimensions[2] * width_scale);
1410 } else {
1411 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1412 }
1413
1414 bool align_corners = false;
1415 bool half_pixel_centers = false;
1416 if (ins.size() == 6) {
1417 align_corners = getScalarData<bool>(operands[ins[4]]);
1418 half_pixel_centers = getScalarData<bool>(operands[ins[5]]);
1419 }
1420 if (align_corners && !half_pixel_centers) {
1421 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1422 }
1423 if (subgraph != nullptr) {
1424 uint32_t flags = 0;
1425 if (align_corners) {
1426 flags |= XNN_FLAG_ALIGN_CORNERS;
1427 } else if (!half_pixel_centers) {
1428 flags |= XNN_FLAG_TENSORFLOW_LEGACY_MODE;
1429 }
1430 const xnn_status status = xnn_define_static_resize_bilinear_2d(
1431 subgraph, new_height, new_width,
1432 /*input_id=*/xnnpackTensors[ins[0]],
1433 /*output_id=*/xnnpackTensors[outs[0]], flags);
1434 if (status != xnn_status_success) {
1435 LOG(ERROR) << "XNNPACK xnn_define_static_resize_bilinear_2d FAILED";
1436 return V1_3::ErrorStatus::GENERAL_FAILURE;
1437 }
1438 }
1439 return V1_3::ErrorStatus::NONE;
1440 }
1441
VisitReluNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,float outputMin,float outputMax,const std::vector<uint32_t> & xnnpackTensors)1442 static V1_3::ErrorStatus VisitReluNode(xnn_subgraph_t subgraph,
1443 const V1_3::Operation& operation,
1444 RunTimeOperandInfo* operands, float outputMin,
1445 float outputMax,
1446 const std::vector<uint32_t>& xnnpackTensors) {
1447 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1448 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1449 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1450 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1451
1452 if (subgraph != nullptr) {
1453 const xnn_status status =
1454 xnn_define_clamp(subgraph, outputMin, outputMax,
1455 /*input_id=*/xnnpackTensors[ins[0]],
1456 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1457 if (status != xnn_status_success) {
1458 LOG(ERROR) << "XNNPACK xnn_define_clamp FAILED";
1459 return V1_3::ErrorStatus::GENERAL_FAILURE;
1460 }
1461 }
1462 return V1_3::ErrorStatus::NONE;
1463 }
1464
VisitSqrtNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1465 static V1_3::ErrorStatus VisitSqrtNode(xnn_subgraph_t subgraph,
1466 const V1_3::Operation& operation,
1467 RunTimeOperandInfo* operands,
1468 const std::vector<uint32_t>& xnnpackTensors) {
1469 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1470 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1471 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1472 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1473
1474 if (subgraph != nullptr) {
1475 const xnn_status status =
1476 xnn_define_square_root(subgraph,
1477 /*input1_id=*/xnnpackTensors[ins[0]],
1478 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1479 if (status != xnn_status_success) {
1480 LOG(ERROR) << "XNNPACK xnn_define_bankers_rounding FAILED";
1481 return V1_3::ErrorStatus::GENERAL_FAILURE;
1482 }
1483 }
1484 return V1_3::ErrorStatus::NONE;
1485 }
1486
VisitSubNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1487 static V1_3::ErrorStatus VisitSubNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1488 RunTimeOperandInfo* operands,
1489 const std::vector<uint32_t>& xnnpackTensors) {
1490 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1491 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1492 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1493 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1494 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1495 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1496
1497 float outputMin = -std::numeric_limits<float>::infinity();
1498 float outputMax = +std::numeric_limits<float>::infinity();
1499 int activation = getScalarData<int32_t>(operands[ins[2]]);
1500 NN_DRIVER_RETURN_IF_ERROR(
1501 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1502
1503 if (subgraph != nullptr) {
1504 const xnn_status status =
1505 xnn_define_subtract(subgraph, outputMin, outputMax,
1506 /*input1_id=*/xnnpackTensors[ins[0]],
1507 /*input2_id=*/xnnpackTensors[ins[1]],
1508 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1509 if (status != xnn_status_success) {
1510 LOG(ERROR) << "XNNPACK xnn_define_subtract FAILED";
1511 return V1_3::ErrorStatus::GENERAL_FAILURE;
1512 }
1513 }
1514 return V1_3::ErrorStatus::NONE;
1515 }
1516
VisitSoftmaxNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1517 static V1_3::ErrorStatus VisitSoftmaxNode(xnn_subgraph_t subgraph,
1518 const V1_3::Operation& operation,
1519 RunTimeOperandInfo* operands,
1520 const std::vector<uint32_t>& xnnpackTensors) {
1521 const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1522 const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1523 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1524 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1525 NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1526
1527 float beta = getScalarData<float>(operands[ins[1]]);
1528 if (beta != 1.0f) {
1529 LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported beta value: " << beta;
1530 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1531 }
1532 if (ins.size() >= 3) {
1533 NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1534 int axis = getScalarData<int32_t>(operands[ins[2]]);
1535 if (axis != -1) {
1536 LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported axis value: " << axis;
1537 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1538 }
1539 }
1540 if (subgraph != nullptr) {
1541 const xnn_status status =
1542 xnn_define_softmax(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1543 /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1544 if (status != xnn_status_success) {
1545 LOG(ERROR) << "XNNPACK xnn_define_softmax FAILED";
1546 return V1_3::ErrorStatus::GENERAL_FAILURE;
1547 }
1548 }
1549
1550 return V1_3::ErrorStatus::NONE;
1551 }
1552
1553 private:
Subgraph(xnn_runtime_t runtime,std::unordered_set<uint32_t> && externals,bool useStaticBuffer=false)1554 Subgraph(xnn_runtime_t runtime, std::unordered_set<uint32_t>&& externals,
1555 bool useStaticBuffer = false)
1556 : mRuntime(runtime, &xnn_delete_runtime),
1557 mExternals(externals),
1558 mUseStaticBuffer(useStaticBuffer) {}
1559
1560 // XNNPACK Runtime (subgraph + workspace) with smart-pointer for lifetime
1561 // management.
1562 std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> mRuntime{nullptr,
1563 &xnn_delete_runtime};
1564 std::unordered_set<uint32_t> mExternals;
1565 bool mFirstRun = true;
1566 bool mUseStaticBuffer;
1567 };
1568
1569 class SamplePreparedModelXNNPACK : public SamplePreparedModel {
1570 public:
SamplePreparedModelXNNPACK(const V1_3::Model & model,const SampleDriver * driver,V1_1::ExecutionPreference preference,uid_t userId,V1_3::Priority priority)1571 SamplePreparedModelXNNPACK(const V1_3::Model& model, const SampleDriver* driver,
1572 V1_1::ExecutionPreference preference, uid_t userId,
1573 V1_3::Priority priority)
1574 : SamplePreparedModel(model, driver, preference, userId, priority),
1575 mSubgraph(nullptr),
1576 mThreadpool(nullptr) {}
~SamplePreparedModelXNNPACK()1577 ~SamplePreparedModelXNNPACK() {
1578 delete mSubgraph;
1579 pthreadpool_destroy(mThreadpool);
1580 };
1581 bool initialize();
1582 hardware::Return<V1_0::ErrorStatus> execute(
1583 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override;
1584 hardware::Return<V1_0::ErrorStatus> execute_1_2(
1585 const V1_0::Request& request, V1_2::MeasureTiming measure,
1586 const sp<V1_2::IExecutionCallback>& callback) override;
1587 hardware::Return<V1_3::ErrorStatus> execute_1_3(
1588 const V1_3::Request& request, V1_2::MeasureTiming measure,
1589 const V1_3::OptionalTimePoint& deadline,
1590 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1591 const sp<V1_3::IExecutionCallback>& callback) override;
1592 hardware::Return<void> executeSynchronously(const V1_0::Request& request,
1593 V1_2::MeasureTiming measure,
1594 executeSynchronously_cb cb) override;
1595 hardware::Return<void> executeSynchronously_1_3(
1596 const V1_3::Request& request, V1_2::MeasureTiming measure,
1597 const V1_3::OptionalTimePoint& deadline,
1598 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1599 executeSynchronously_1_3_cb cb) override;
1600 hardware::Return<void> configureExecutionBurst(
1601 const sp<V1_2::IBurstCallback>& callback,
1602 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
1603 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
1604 configureExecutionBurst_cb cb) override;
1605 hardware::Return<void> executeFenced(const V1_3::Request& request,
1606 const hardware::hidl_vec<hardware::hidl_handle>& wait_for,
1607 V1_2::MeasureTiming measure,
1608 const V1_3::OptionalTimePoint& deadline,
1609 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1610 const V1_3::OptionalTimeoutDuration& duration,
1611 executeFenced_cb callback) override;
1612
1613 private:
1614 Subgraph* mSubgraph;
1615 std::vector<RunTimeOperandInfo> mOperands;
1616 pthreadpool* mThreadpool;
1617 };
1618
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)1619 hardware::Return<void> SamplePreparedModelXNNPACK::configureExecutionBurst(
1620 const sp<V1_2::IBurstCallback>& callback,
1621 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
1622 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
1623 configureExecutionBurst_cb cb) {
1624 VLOG(DRIVER) << "SamplePreparedModelXNNPACK::configureExecutionBurst not supported";
1625 cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
1626 return hardware::Void();
1627 }
1628
initialize()1629 bool SamplePreparedModelXNNPACK::initialize() {
1630 auto status = SamplePreparedModel::initialize();
1631 mThreadpool = pthreadpool_create(kNumOfWorkerThreads);
1632 if (mThreadpool == nullptr) {
1633 VLOG(DRIVER) << "SamplePreparedModelXNNPACK::initialize failed to create pthreadpool, "
1634 "fallback to single threaded execution";
1635 }
1636 const V1_3::Model* model = getModel();
1637 mOperands = initializeRunTimeInfo(model->main, mPoolInfos, &model->operandValues);
1638 mSubgraph = Subgraph::Create(model->main.operations, mOperands, model->main.inputIndexes,
1639 model->main.outputIndexes, mThreadpool);
1640 return status;
1641 }
1642
1643 template <typename T_IExecutionCallback>
asyncExecuteXNNPACK(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const LegacyOptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<T_IExecutionCallback> & callback)1644 void asyncExecuteXNNPACK(Subgraph* subgraph, RunTimeOperandInfo* operands,
1645 const V1_3::Request& request, V1_2::MeasureTiming measure,
1646 const V1_3::Model& model, const LegacyOptionalTimePoint& deadline,
1647 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1648 const sp<T_IExecutionCallback>& callback) {
1649 std::vector<RunTimePoolInfo> requestPoolInfos;
1650 if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1651 notify(callback, V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
1652 }
1653 updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
1654 updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
1655 auto status = subgraph->Invoke(operands);
1656 VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1657 if (status == V1_3::ErrorStatus::NONE) {
1658 VLOG(DRIVER) << "Completed run normally";
1659 for (auto& runtimeInfo : requestPoolInfos) {
1660 runtimeInfo.flush();
1661 }
1662 }
1663 notify(callback, status, {}, kNoTiming);
1664 }
1665
1666 template <typename T_IExecutionCallback>
executeXNNPACKBase(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<T_IExecutionCallback> & callback)1667 V1_3::ErrorStatus executeXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
1668 const V1_3::Request& request, V1_2::MeasureTiming measure,
1669 const V1_3::Model& model,
1670 const V1_3::OptionalTimePoint& halDeadline,
1671 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1672 const sp<T_IExecutionCallback>& callback) {
1673 VLOG(DRIVER) << "executeXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
1674
1675 if (callback.get() == nullptr) {
1676 LOG(ERROR) << "invalid callback passed to executeXNNPACKBase";
1677 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1678 }
1679 if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
1680 notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
1681 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1682 }
1683 const auto deadline = makeDeadline(halDeadline);
1684 if (hasDeadlinePassed(deadline)) {
1685 notify(callback, V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming);
1686 return V1_3::ErrorStatus::NONE;
1687 }
1688
1689 // This thread is intentionally detached because the sample driver service
1690 // is expected to live forever.
1691 std::thread([&subgraph, &operands, &model, request, measure, deadline, loopTimeoutDuration,
1692 callback] {
1693 asyncExecuteXNNPACK(subgraph, operands, request, measure, model, deadline,
1694 loopTimeoutDuration, callback);
1695 }).detach();
1696
1697 return V1_3::ErrorStatus::NONE;
1698 }
1699
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)1700 hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute(
1701 const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) {
1702 const V1_3::Model* model = getModel();
1703 const V1_3::ErrorStatus status =
1704 executeXNNPACKBase(mSubgraph, mOperands.data(), convertToV1_3(request),
1705 V1_2::MeasureTiming::NO, *model, {}, {}, callback);
1706 return convertToV1_0(status);
1707 }
1708
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)1709 hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_2(
1710 const V1_0::Request& request, V1_2::MeasureTiming measure,
1711 const sp<V1_2::IExecutionCallback>& callback) {
1712 const V1_3::Model* model = getModel();
1713 const V1_3::ErrorStatus status = executeXNNPACKBase(
1714 mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {}, callback);
1715 return convertToV1_0(status);
1716 }
1717
execute_1_3(const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<V1_3::IExecutionCallback> & callback)1718 hardware::Return<V1_3::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_3(
1719 const V1_3::Request& request, V1_2::MeasureTiming measure,
1720 const V1_3::OptionalTimePoint& deadline,
1721 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1722 const sp<V1_3::IExecutionCallback>& callback) {
1723 const V1_3::Model* model = getModel();
1724 return executeXNNPACKBase(mSubgraph, mOperands.data(), request, measure, *model, deadline,
1725 loopTimeoutDuration, callback);
1726 }
1727
1728 static std::tuple<V1_3::ErrorStatus, hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing>
executeSynchronouslyXNNPACKBase(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration)1729 executeSynchronouslyXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
1730 const V1_3::Request& request, V1_2::MeasureTiming measure,
1731 const V1_3::Model& model,
1732 const V1_3::OptionalTimePoint& halDeadline,
1733 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration) {
1734 VLOG(DRIVER) << "executeSynchronouslyXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
1735
1736 if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
1737 return {V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
1738 }
1739 const auto deadline = makeDeadline(halDeadline);
1740 if (hasDeadlinePassed(deadline)) {
1741 return {V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming};
1742 }
1743
1744 std::vector<RunTimePoolInfo> requestPoolInfos;
1745 if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1746 return {V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming};
1747 }
1748 updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
1749 updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
1750 VLOG(DRIVER) << "XNNPACK subgraph invoke started";
1751 auto status = subgraph->Invoke(operands);
1752 VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1753 if (status == V1_3::ErrorStatus::NONE) {
1754 VLOG(DRIVER) << "Completed run normally";
1755 for (auto& runtimeInfo : requestPoolInfos) {
1756 runtimeInfo.flush();
1757 }
1758 }
1759 return {status, {}, kNoTiming};
1760 }
1761
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)1762 hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously(
1763 const V1_0::Request& request, V1_2::MeasureTiming measure, executeSynchronously_cb cb) {
1764 const V1_3::Model* model = getModel();
1765 auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
1766 mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {});
1767 cb(convertToV1_0(status), std::move(outputShapes), timing);
1768 return hardware::Void();
1769 }
1770
executeSynchronously_1_3(const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,executeSynchronously_1_3_cb cb)1771 hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously_1_3(
1772 const V1_3::Request& request, V1_2::MeasureTiming measure,
1773 const V1_3::OptionalTimePoint& deadline,
1774 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, executeSynchronously_1_3_cb cb) {
1775 const V1_3::Model* model = getModel();
1776 auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
1777 mSubgraph, mOperands.data(), request, measure, *model, deadline, loopTimeoutDuration);
1778 cb(status, std::move(outputShapes), timing);
1779 return hardware::Void();
1780 }
1781
1782 // The sample driver will finish the execution and then return.
executeFenced(const V1_3::Request & request,const hardware::hidl_vec<hardware::hidl_handle> & waitFor,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const V1_3::OptionalTimeoutDuration & duration,executeFenced_cb cb)1783 hardware::Return<void> SamplePreparedModelXNNPACK::executeFenced(
1784 const V1_3::Request& request, const hardware::hidl_vec<hardware::hidl_handle>& waitFor,
1785 V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint& halDeadline,
1786 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1787 const V1_3::OptionalTimeoutDuration& duration, executeFenced_cb cb) {
1788 VLOG(DRIVER) << "executeFenced(" << SHOW_IF_DEBUG(toString(request)) << ")";
1789 const V1_3::Model* model = getModel();
1790 if (!validateRequest(request, *model, /*allowUnspecifiedOutput=*/false)) {
1791 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
1792 return hardware::Void();
1793 }
1794 const auto deadline = makeDeadline(halDeadline);
1795 if (hasDeadlinePassed(deadline)) {
1796 cb(V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, hardware::hidl_handle(nullptr), nullptr);
1797 return hardware::Void();
1798 }
1799
1800 // Wait for the dependent events to signal
1801 for (const auto& fenceHandle : waitFor) {
1802 if (!fenceHandle.getNativeHandle()) {
1803 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
1804 return hardware::Void();
1805 }
1806 int syncFenceFd = fenceHandle.getNativeHandle()->data[0];
1807 if (syncWait(syncFenceFd, -1) != FenceState::SIGNALED) {
1808 LOG(ERROR) << "syncWait failed";
1809 cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
1810 return hardware::Void();
1811 }
1812 }
1813 std::vector<RunTimePoolInfo> requestPoolInfos;
1814 if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1815 cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
1816 }
1817 updateForArguments(model->main.inputIndexes, request.inputs, requestPoolInfos,
1818 mOperands.data());
1819 updateForArguments(model->main.outputIndexes, request.outputs, requestPoolInfos,
1820 mOperands.data());
1821 auto status = mSubgraph->Invoke(mOperands.data());
1822 VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1823 if (status == V1_3::ErrorStatus::NONE) {
1824 VLOG(DRIVER) << "Completed run normally";
1825 for (auto& runtimeInfo : requestPoolInfos) {
1826 runtimeInfo.flush();
1827 }
1828 }
1829
1830 sp<SampleFencedExecutionCallback> fencedExecutionCallback =
1831 new SampleFencedExecutionCallback(kNoTiming, kNoTiming, status);
1832 cb(status, hardware::hidl_handle(nullptr), fencedExecutionCallback);
1833 return hardware::Void();
1834 }
1835
1836 class SampleDriverFloatXNNPACK : public SampleDriverPartial {
1837 public:
SampleDriverFloatXNNPACK(const std::string & name)1838 SampleDriverFloatXNNPACK(const std::string& name) : SampleDriverPartial(name.c_str()) {}
1839 hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override;
1840 hardware::Return<V1_0::ErrorStatus> prepareModel(
1841 const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override;
1842 hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
1843 const V1_1::Model& model, V1_1::ExecutionPreference preference,
1844 const sp<V1_0::IPreparedModelCallback>& callback) override;
1845 hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
1846 const V1_2::Model& model, V1_1::ExecutionPreference preference,
1847 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1848 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1849 const sp<V1_2::IPreparedModelCallback>& callback) override;
1850 hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
1851 const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
1852 const V1_3::OptionalTimePoint& deadline,
1853 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1854 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1855 const sp<V1_3::IPreparedModelCallback>& callback) override;
1856 hardware::Return<void> allocate(
1857 const V1_3::BufferDesc& desc,
1858 const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
1859 const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
1860 const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) override;
1861
1862 private:
1863 std::vector<bool> getSupportedOperationsImpl(const V1_3::Model& model) const override;
1864 };
1865
1866 template <typename T_Model, typename T_IPreparedModelCallback>
prepareModelXNNPACK(const T_Model & model,const SampleDriver * driver,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const sp<T_IPreparedModelCallback> & callback)1867 V1_3::ErrorStatus prepareModelXNNPACK(const T_Model& model, const SampleDriver* driver,
1868 V1_1::ExecutionPreference preference, V1_3::Priority priority,
1869 const V1_3::OptionalTimePoint& deadline,
1870 const sp<T_IPreparedModelCallback>& callback) {
1871 const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
1872 if (callback.get() == nullptr) {
1873 LOG(ERROR) << "invalid callback passed to prepareModelBase";
1874 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1875 }
1876 if (VLOG_IS_ON(DRIVER)) {
1877 VLOG(DRIVER) << "prepareModelBase";
1878 logModelToInfo(model);
1879 }
1880 if (!validateModel(model) || !validateExecutionPreference(preference) ||
1881 !validatePriority(priority)) {
1882 notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
1883 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1884 }
1885
1886 // asynchronously prepare the model from a new, detached thread
1887 std::thread([model, driver, preference, userId, priority, callback] {
1888 sp<SamplePreparedModelXNNPACK> preparedModel = new SamplePreparedModelXNNPACK(
1889 convertToV1_3(model), driver, preference, userId, priority);
1890 if (!preparedModel->initialize()) {
1891 notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
1892 return;
1893 }
1894 notify(callback, V1_3::ErrorStatus::NONE, preparedModel);
1895 }).detach();
1896
1897 return V1_3::ErrorStatus::NONE;
1898 }
1899
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)1900 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel(
1901 const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) {
1902 const V1_3::ErrorStatus status =
1903 prepareModelXNNPACK(model, this, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER,
1904 kDefaultPriority13, {}, callback);
1905 return convertToV1_0(status);
1906 }
1907
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & callback)1908 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_1(
1909 const V1_1::Model& model, V1_1::ExecutionPreference preference,
1910 const sp<V1_0::IPreparedModelCallback>& callback) {
1911 const V1_3::ErrorStatus status =
1912 prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
1913 return convertToV1_0(status);
1914 }
1915
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)1916 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_2(
1917 const V1_2::Model& model, V1_1::ExecutionPreference preference,
1918 const hardware::hidl_vec<hardware::hidl_handle>&,
1919 const hardware::hidl_vec<hardware::hidl_handle>&, const HalCacheToken&,
1920 const sp<V1_2::IPreparedModelCallback>& callback) {
1921 const V1_3::ErrorStatus status =
1922 prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
1923 return convertToV1_0(status);
1924 }
1925
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)1926 hardware::Return<V1_3::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_3(
1927 const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
1928 const V1_3::OptionalTimePoint& deadline,
1929 const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1930 const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1931 const sp<V1_3::IPreparedModelCallback>& callback) {
1932 return prepareModelXNNPACK(model, this, preference, priority, deadline, callback);
1933 }
1934
getCapabilities_1_3(getCapabilities_1_3_cb cb)1935 hardware::Return<void> SampleDriverFloatXNNPACK::getCapabilities_1_3(getCapabilities_1_3_cb cb) {
1936 android::nn::initVLogMask();
1937 VLOG(DRIVER) << "SampleDriverFloatXNNPACK::getCapabilities()";
1938
1939 V1_3::Capabilities capabilities = {
1940 .relaxedFloat32toFloat16PerformanceScalar = {.execTime = 0.7f, .powerUsage = 1.1f},
1941 .relaxedFloat32toFloat16PerformanceTensor = {.execTime = 0.7f, .powerUsage = 1.1f},
1942 .operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({1.0f, 1.0f}),
1943 .ifPerformance = {.execTime = 1.0f, .powerUsage = 1.0f},
1944 .whilePerformance = {.execTime = 1.0f, .powerUsage = 1.0f}};
1945 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
1946 {.execTime = 0.8f, .powerUsage = 1.2f});
1947 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
1948 {.execTime = 0.8f, .powerUsage = 1.2f});
1949
1950 cb(V1_3::ErrorStatus::NONE, capabilities);
1951 return hardware::Void();
1952 }
1953
getSupportedOperationsImpl(const V1_3::Model & model) const1954 std::vector<bool> SampleDriverFloatXNNPACK::getSupportedOperationsImpl(
1955 const V1_3::Model& model) const {
1956 std::vector<RunTimePoolInfo> poolInfos;
1957 setRunTimePoolInfosFromCanonicalMemories(&poolInfos, uncheckedConvert(model.pools));
1958 auto operands = initializeRunTimeInfo(model.main, poolInfos, &model.operandValues);
1959 const size_t count = model.main.operations.size();
1960 std::vector<bool> supported(count);
1961 for (size_t i = 0; i < count; i++) {
1962 bool isSupportedOp = false;
1963 const V1_3::Operation& operation = model.main.operations[i];
1964 if (Subgraph::VisitNode(/*subgraph=*/nullptr, operation, operands.data(), {}) ==
1965 V1_3::ErrorStatus::NONE) {
1966 isSupportedOp = true;
1967 }
1968 supported[i] = isSupportedOp;
1969 }
1970 return supported;
1971 }
1972
allocate(const V1_3::BufferDesc & desc,const hardware::hidl_vec<sp<V1_3::IPreparedModel>> & preparedModels,const hardware::hidl_vec<V1_3::BufferRole> & inputRoles,const hardware::hidl_vec<V1_3::BufferRole> & outputRoles,allocate_cb cb)1973 hardware::Return<void> SampleDriverFloatXNNPACK::allocate(
1974 const V1_3::BufferDesc& desc,
1975 const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
1976 const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
1977 const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) {
1978 VLOG(DRIVER) << "SampleDriverFloatXNNPACK::allocate not supported";
1979 constexpr uint32_t kInvalidBufferToken = 0;
1980 cb(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr, kInvalidBufferToken);
1981 return hardware::Void();
1982 }
1983
1984 } // namespace sample_driver
1985 } // namespace nn
1986 } // namespace android
1987
1988 using android::sp;
1989 using android::nn::sample_driver::SampleDriverFloatXNNPACK;
1990
main()1991 int main() {
1992 const std::string name = "nnapi-sample_float_xnnpack";
1993 const auto driver = sp<SampleDriverFloatXNNPACK>::make(name);
1994 xnn_status status = xnn_initialize(/*allocator=*/nullptr);
1995 if (status != xnn_status_success) {
1996 return 0;
1997 }
1998 return run(driver, name);
1999 }
2000