1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "SampleDriverFloatXNNPACK"
18 
19 #include <CpuExecutor.h>
20 #include <HalInterfaces.h>
21 #include <Utils.h>
22 #include <ValidateHal.h>
23 #include <android-base/logging.h>
24 #include <hidl/LegacySupport.h>
25 #include <hwbinder/IPCThreadState.h>
26 #include <xnnpack.h>
27 
28 #include <algorithm>
29 #include <cstdint>
30 #include <limits>
31 #include <memory>
32 #include <string>
33 #include <thread>
34 #include <tuple>
35 #include <unordered_set>
36 #include <utility>
37 #include <vector>
38 
39 #include "SampleDriverPartial.h"
40 #include "SampleDriverUtils.h"
41 
42 namespace android {
43 namespace nn {
44 namespace sample_driver {
45 
46 namespace {
47 
48 #define NN_DRIVER_RETURN_IF_ERROR(expr)              \
49     do {                                             \
50         V1_3::ErrorStatus _errorCode = (expr);       \
51         if (_errorCode != V1_3::ErrorStatus::NONE) { \
52             return _errorCode;                       \
53         }                                            \
54     } while (0)
55 
56 const size_t kNumOfWorkerThreads = 1;
57 static const V1_2::Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
58 
isScalarType(OperandType type)59 bool isScalarType(OperandType type) {
60     switch (type) {
61         case OperandType::FLOAT16:
62         case OperandType::FLOAT32:
63         case OperandType::INT32:
64         case OperandType::UINT32:
65         case OperandType::BOOL:
66             return true;
67         default:
68             return false;
69     }
70 }
71 
updateForArguments(const std::vector<uint32_t> & indexes,const hardware::hidl_vec<V1_0::RequestArgument> & arguments,const std::vector<RunTimePoolInfo> & requestPoolInfos,RunTimeOperandInfo * operands)72 void updateForArguments(const std::vector<uint32_t>& indexes,
73                         const hardware::hidl_vec<V1_0::RequestArgument>& arguments,
74                         const std::vector<RunTimePoolInfo>& requestPoolInfos,
75                         RunTimeOperandInfo* operands) {
76     CHECK_EQ(indexes.size(), arguments.size());
77     for (size_t i = 0; i < indexes.size(); i++) {
78         const uint32_t operandIndex = indexes[i];
79         const V1_0::RequestArgument& from = arguments[i];
80         RunTimeOperandInfo& to = operands[operandIndex];
81         if (from.dimensions.size() > 0) {
82             // It's the responsibility of the caller to validate that
83             // from.dimensions only modifies the dimensions that were
84             // unspecified in the model.  That's the case in SampleDriver.cpp
85             // with the call to validateRequest().
86             // TODO make sure that's the case for the default CPU path.
87             to.dimensions = from.dimensions;
88         }
89         if (from.hasNoValue) {
90             to.lifetime = Operand::LifeTime::NO_VALUE;
91             CHECK(to.buffer == nullptr);
92             to.length = 0;
93         } else {
94             auto poolIndex = from.location.poolIndex;
95             CHECK_LT(poolIndex, requestPoolInfos.size());
96             auto& r = requestPoolInfos[poolIndex];
97             to.buffer = r.getBuffer() + from.location.offset;
98             if (from.location.offset == 0 && from.location.length == 0) {
99                 // Use the entire memory region.
100                 to.length = r.getSize();
101             } else {
102                 to.length = from.location.length;
103             }
104         }
105     }
106 }
107 
initializeRunTimeInfo(const V1_3::Subgraph & subgraph,const std::vector<RunTimePoolInfo> & modelPoolInfos,const hardware::hidl_vec<uint8_t> * mModelOperandValues)108 std::vector<RunTimeOperandInfo> initializeRunTimeInfo(
109         const V1_3::Subgraph& subgraph, const std::vector<RunTimePoolInfo>& modelPoolInfos,
110         const hardware::hidl_vec<uint8_t>* mModelOperandValues) {
111     const size_t count = subgraph.operands.size();
112     std::vector<RunTimeOperandInfo> operands(count);
113     for (size_t i = 0; i < count; i++) {
114         const V1_3::Operand& from = subgraph.operands[i];
115         RunTimeOperandInfo& to = operands[i];
116         to.type = uncheckedConvert(from.type);
117         to.dimensions = from.dimensions;
118         to.scale = from.scale;
119         to.zeroPoint = from.zeroPoint;
120         to.length = from.location.length;
121         to.lifetime = uncheckedConvert(from.lifetime);
122         to.extraParams = uncheckedConvert(from.extraParams);
123         switch (from.lifetime) {
124             case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
125                 to.buffer = nullptr;
126                 to.numberOfUsesLeft = from.numberOfConsumers;
127                 break;
128             case V1_3::OperandLifeTime::CONSTANT_COPY:
129                 to.buffer = const_cast<uint8_t*>(&(*mModelOperandValues)[from.location.offset]);
130                 to.numberOfUsesLeft = 0;
131                 break;
132             case V1_3::OperandLifeTime::CONSTANT_REFERENCE: {
133                 auto poolIndex = from.location.poolIndex;
134                 CHECK_LT(poolIndex, modelPoolInfos.size());
135                 auto& r = modelPoolInfos[poolIndex];
136                 to.buffer = r.getBuffer() + from.location.offset;
137                 to.numberOfUsesLeft = 0;
138                 break;
139             }
140             case V1_3::OperandLifeTime::SUBGRAPH:
141             case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
142             case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
143             case V1_3::OperandLifeTime::NO_VALUE:
144                 to.buffer = nullptr;
145                 to.numberOfUsesLeft = 0;
146                 break;
147         }
148     }
149     return operands;
150 }
151 
152 }  // namespace
153 
154 class Subgraph {
155    public:
Create(const hardware::hidl_vec<V1_3::Operation> & operations,std::vector<RunTimeOperandInfo> & operands,const std::vector<uint32_t> & inputIndexes,const std::vector<uint32_t> & outputIndexes,pthreadpool_t threadpool,bool useStaticBuffer=false)156     static Subgraph* Create(const hardware::hidl_vec<V1_3::Operation>& operations,
157                             std::vector<RunTimeOperandInfo>& operands,
158                             const std::vector<uint32_t>& inputIndexes,
159                             const std::vector<uint32_t>& outputIndexes, pthreadpool_t threadpool,
160                             bool useStaticBuffer = false) {
161         // Convert subgraph inputs and outputs to hash sets for faster lookup.
162         const std::unordered_set<uint32_t> inputs(inputIndexes.begin(), inputIndexes.end());
163         const std::unordered_set<uint32_t> outputs(outputIndexes.begin(), outputIndexes.end());
164         std::unordered_set<uint32_t> externals(outputs);
165 
166         xnn_subgraph_t subgraphPtr = nullptr;
167         xnn_status status = xnn_create_subgraph(
168                 /*external_value_ids=*/operands.size(), /*flags=*/0, &subgraphPtr);
169         if (status != xnn_status_success) {
170             LOG(ERROR) << "XNNPACK xnn_create_subgraph FAILED";
171             return nullptr;
172         }
173 
174         // Smart pointer to automatically release subgraph on exit.
175         std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
176                 subgraphPtr, &xnn_delete_subgraph);
177 
178         // Detect which tensors are used as inputs or outputs of any subgraph nodes.
179         // -1 denotes tensor not used in the subgraph.
180         std::vector<int> tensors(operands.size(), -1);
181 
182         for (const auto& operation : operations) {
183             const std::vector<uint32_t>& ins = operation.inputs;
184             const std::vector<uint32_t>& outs = operation.outputs;
185             switch (operation.type) {
186                 case V1_3::OperationType::MEAN:
187                 case V1_3::OperationType::PAD:
188                 case V1_3::OperationType::RESHAPE:
189                 case V1_3::OperationType::RESIZE_BILINEAR:
190                     // Ignore the second input (axes, static padding, or new shape),
191                     // because it is represented as parameters of the XNNPACK operator
192                     // rather than extra input.
193                     {
194                         const int t = ins[0];
195                         tensors[t] = t;
196                     }
197                     break;
198                 default:
199                     // All other operators: process all inputs
200                     for (size_t k = 0; k < ins.size(); k++) {
201                         if (isScalarType(operands[ins[k]].type)) continue;
202                         const int t = ins[k];
203                         tensors[t] = t;
204                     }
205             }
206             for (size_t k = 0; k < outs.size(); k++) {
207                 if (isScalarType(operands[outs[k]].type)) continue;
208                 const int t = outs[k];
209                 tensors[t] = t;
210             }
211         }
212 
213         // XNNPACK Value IDs for NNAPI Operands
214         std::vector<uint32_t> xnnpackTensors(operands.size());
215         for (int t : tensors) {
216             if (t < 0) continue;
217             if (operands[tensors[t]].type != OperandType::TENSOR_FLOAT32) {
218                 LOG(ERROR) << "XNNPACK only support FLOAT32 tensors";
219                 return nullptr;
220             }
221 
222             uint32_t flags = 0;
223             const void* data = nullptr;
224             if (operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_COPY ||
225                 operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
226                 operands[tensors[t]].lifetime == Operand::LifeTime::POINTER) {
227                 data = operands[tensors[t]].buffer;
228             }
229             if (inputs.count(t) != 0) {
230                 flags |= XNN_VALUE_FLAG_EXTERNAL_INPUT;
231                 CHECK(data == nullptr);
232                 VLOG(DRIVER) << "found input tensor, add to external";
233                 externals.insert(static_cast<uint32_t>(t));
234             }
235             if (outputs.count(t) != 0) {
236                 flags |= XNN_VALUE_FLAG_EXTERNAL_OUTPUT;
237             }
238 
239             std::vector<size_t> dims(operands[tensors[t]].dimensions.size());
240             for (size_t i = 0; i < dims.size(); i++) {
241                 dims[i] = operands[tensors[t]].dimensions[i];
242             }
243 
244             const xnn_status status = xnn_define_tensor_value(
245                     subgraph.get(), xnn_datatype_fp32, dims.size(), dims.data(), data,
246                     static_cast<uint32_t>(t), flags, &xnnpackTensors[t]);
247             if (status != xnn_status_success) {
248                 LOG(ERROR) << "XNNPACK xnn_define_tensor_value failed";
249                 return nullptr;
250             }
251         }
252 
253         // Create XNNPACK nodes for NNAPI Operations
254         for (const auto& operation : operations) {
255             if (VisitNode(subgraph.get(), operation, operands.data(), xnnpackTensors) !=
256                 V1_3::ErrorStatus::NONE) {
257                 LOG(ERROR) << "XNNPACK add op failed";
258                 return nullptr;
259             }
260         }
261 
262         xnn_runtime_t runtimePtr = nullptr;
263         status = xnn_create_runtime_v2(subgraph.get(), threadpool, /*flags=*/0, &runtimePtr);
264         if (status != xnn_status_success) {
265             LOG(ERROR) << "XNNPACK xnn_create_runtime_v2 FAILED";
266             return nullptr;
267         }
268         return new Subgraph(runtimePtr, std::move(externals), useStaticBuffer);
269     }
270 
Prepare()271     V1_3::ErrorStatus Prepare() { return V1_3::ErrorStatus::NONE; }
272 
Invoke(RunTimeOperandInfo * operands)273     V1_3::ErrorStatus Invoke(RunTimeOperandInfo* operands) {
274         VLOG(DRIVER) << "Subgraph::Invoke() start";
275         if (!mUseStaticBuffer || mFirstRun) {
276             VLOG(DRIVER) << "Setup buffer for Subgraph";
277             std::vector<xnn_external_value> externalValues;
278 
279             for (uint32_t t : mExternals) {
280                 xnn_external_value value = {.id = 0, .data = nullptr};
281                 value.id = t;
282                 value.data = operands[t].buffer;
283                 externalValues.push_back(value);
284             }
285 
286             const xnn_status status =
287                     xnn_setup_runtime(mRuntime.get(), externalValues.size(), externalValues.data());
288             if (status != xnn_status_success) {
289                 LOG(ERROR) << "XNNPACK xnn_setup_runtime FAILED";
290                 return V1_3::ErrorStatus::GENERAL_FAILURE;
291             }
292             mFirstRun = false;
293         }
294         VLOG(DRIVER) << "Subgraph::Invoke() finished xnn_setup_runtime";
295         const xnn_status status = xnn_invoke_runtime(mRuntime.get());
296         if (status != xnn_status_success) {
297             LOG(ERROR) << "XNNPACK xnn_invoke_runtime FAILED";
298             return V1_3::ErrorStatus::GENERAL_FAILURE;
299         }
300 
301         return V1_3::ErrorStatus::NONE;
302     }
303 
CalculatePadding(int padding,uint32_t * flags)304     static V1_3::ErrorStatus CalculatePadding(int padding, uint32_t* flags) {
305         switch (padding) {
306             case ANEURALNETWORKS_PADDING_SAME:
307                 *flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
308                 return V1_3::ErrorStatus::NONE;
309             case ANEURALNETWORKS_PADDING_VALID:
310                 *flags = 0;
311                 return V1_3::ErrorStatus::NONE;
312             default:
313                 LOG(ERROR) << "invalid padding mode";
314                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
315         }
316     }
317 
ConvertActivationToOutputRange(int activation,float * outputMin,float * outputMax)318     static V1_3::ErrorStatus ConvertActivationToOutputRange(int activation, float* outputMin,
319                                                             float* outputMax) {
320         switch (activation) {
321             case ANEURALNETWORKS_FUSED_NONE:
322                 *outputMin = -std::numeric_limits<float>::infinity();
323                 *outputMax = +std::numeric_limits<float>::infinity();
324                 return V1_3::ErrorStatus::NONE;
325             case ANEURALNETWORKS_FUSED_RELU:
326                 *outputMin = 0.0f;
327                 *outputMax = +std::numeric_limits<float>::infinity();
328                 return V1_3::ErrorStatus::NONE;
329             case ANEURALNETWORKS_FUSED_RELU1:
330                 *outputMin = -1.0f;
331                 *outputMax = +1.0f;
332                 return V1_3::ErrorStatus::NONE;
333             case ANEURALNETWORKS_FUSED_RELU6:
334                 *outputMin = 0.0f;
335                 *outputMax = 6.0f;
336                 return V1_3::ErrorStatus::NONE;
337             default:
338                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
339         }
340     }
341 
CheckConvolutionParams(int32_t stride_width,int32_t stride_height,int32_t dilation_width_factor,int32_t dilation_height_factor)342     static V1_3::ErrorStatus CheckConvolutionParams(int32_t stride_width, int32_t stride_height,
343                                                     int32_t dilation_width_factor,
344                                                     int32_t dilation_height_factor) {
345         if (stride_width <= 0) {
346             return V1_3::ErrorStatus::INVALID_ARGUMENT;
347         }
348         if (stride_height <= 0) {
349             return V1_3::ErrorStatus::INVALID_ARGUMENT;
350         }
351 
352         if (dilation_width_factor <= 0) {
353             return V1_3::ErrorStatus::INVALID_ARGUMENT;
354         }
355         if (dilation_height_factor <= 0) {
356             return V1_3::ErrorStatus::INVALID_ARGUMENT;
357         }
358         return V1_3::ErrorStatus::NONE;
359     }
360 
CheckDepthwiseConvolutionParams(int32_t stride_width,int32_t stride_height,int32_t dilation_width_factor,int32_t dilation_height_factor,int32_t depth_multiplier,uint32_t output_channels)361     static V1_3::ErrorStatus CheckDepthwiseConvolutionParams(
362             int32_t stride_width, int32_t stride_height, int32_t dilation_width_factor,
363             int32_t dilation_height_factor, int32_t depth_multiplier, uint32_t output_channels) {
364         if (stride_width <= 0) {
365             return V1_3::ErrorStatus::INVALID_ARGUMENT;
366         }
367         if (stride_height <= 0) {
368             return V1_3::ErrorStatus::INVALID_ARGUMENT;
369         }
370 
371         if (depth_multiplier <= 0) {
372             return V1_3::ErrorStatus::INVALID_ARGUMENT;
373         }
374         if (output_channels % depth_multiplier != 0) {
375             return V1_3::ErrorStatus::INVALID_ARGUMENT;
376         }
377 
378         if (dilation_width_factor <= 0) {
379             return V1_3::ErrorStatus::INVALID_ARGUMENT;
380         }
381         if (dilation_height_factor <= 0) {
382             return V1_3::ErrorStatus::INVALID_ARGUMENT;
383         }
384 
385         return V1_3::ErrorStatus::NONE;
386     }
387 
CheckPoolingParams(int32_t stride_width,int32_t stride_height,int32_t filter_width,int32_t filter_height)388     static V1_3::ErrorStatus CheckPoolingParams(int32_t stride_width, int32_t stride_height,
389                                                 int32_t filter_width, int32_t filter_height) {
390         if (stride_width <= 0) {
391             return V1_3::ErrorStatus::INVALID_ARGUMENT;
392         }
393         if (stride_height <= 0) {
394             return V1_3::ErrorStatus::INVALID_ARGUMENT;
395         }
396 
397         if (filter_width <= 0) {
398             return V1_3::ErrorStatus::INVALID_ARGUMENT;
399         }
400         if (filter_height <= 0) {
401             return V1_3::ErrorStatus::INVALID_ARGUMENT;
402         }
403         if (filter_width == 1 && filter_height == 1 && std::max(stride_width, stride_height) > 1) {
404             return V1_3::ErrorStatus::INVALID_ARGUMENT;
405         }
406         return V1_3::ErrorStatus::NONE;
407     }
408 
CheckNumInputsAndOutputs(const V1_3::Operation & operation,uint32_t expected_num_inputs,uint32_t expected_num_outputs)409     static V1_3::ErrorStatus CheckNumInputsAndOutputs(const V1_3::Operation& operation,
410                                                       uint32_t expected_num_inputs,
411                                                       uint32_t expected_num_outputs) {
412         if (operation.inputs.size() != expected_num_inputs) {
413             return V1_3::ErrorStatus::INVALID_ARGUMENT;
414         }
415         if (operation.outputs.size() != expected_num_outputs) {
416             return V1_3::ErrorStatus::INVALID_ARGUMENT;
417         }
418         return V1_3::ErrorStatus::NONE;
419     }
420 
CheckTensorType(OperandType tensor_type,OperandType expected_type)421     static V1_3::ErrorStatus CheckTensorType(OperandType tensor_type, OperandType expected_type) {
422         if (tensor_type != expected_type) {
423             return V1_3::ErrorStatus::INVALID_ARGUMENT;
424         }
425         return V1_3::ErrorStatus::NONE;
426     }
427 
CheckTensorFloatType(OperandType tensor_type)428     static V1_3::ErrorStatus CheckTensorFloatType(OperandType tensor_type) {
429         if (tensor_type != OperandType::TENSOR_FLOAT32) {
430             return V1_3::ErrorStatus::INVALID_ARGUMENT;
431         }
432         return V1_3::ErrorStatus::NONE;
433     }
434 
CheckTensorShape(std::vector<uint32_t> & dimensions,uint32_t min_num_dims,uint32_t max_num_dims)435     static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
436                                               uint32_t min_num_dims, uint32_t max_num_dims) {
437         if (min_num_dims == max_num_dims) {
438             if (dimensions.size() != min_num_dims) {
439                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
440             }
441         } else {
442             if (dimensions.size() < min_num_dims || dimensions.size() > max_num_dims) {
443                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
444             }
445         }
446         for (size_t i = 0; i < dimensions.size(); i++) {
447             if (dimensions[i] <= 0) {
448                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
449             }
450         }
451         return V1_3::ErrorStatus::NONE;
452     }
453 
CheckTensorShape(std::vector<uint32_t> & dimensions,int expected_num_dims)454     static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
455                                               int expected_num_dims) {
456         return CheckTensorShape(dimensions, expected_num_dims, expected_num_dims);
457     }
458 
CheckSlopeTensorShape(std::vector<uint32_t> & dimensions)459     static V1_3::ErrorStatus CheckSlopeTensorShape(std::vector<uint32_t>& dimensions) {
460         if (dimensions.size() < 1) {
461             return V1_3::ErrorStatus::INVALID_ARGUMENT;
462         }
463         // Validate that all non-channel dimensions (if any) are exactly 1.
464         for (size_t i = 0; i < dimensions.size() - 1; i++) {
465             if (dimensions[i] != 1) {
466                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
467             }
468         }
469         return V1_3::ErrorStatus::NONE;
470     }
471 
CheckAxesTensorShape(std::vector<uint32_t> & dimensions)472     static V1_3::ErrorStatus CheckAxesTensorShape(std::vector<uint32_t>& dimensions) {
473         if (dimensions.size() != 1) {
474             return V1_3::ErrorStatus::INVALID_ARGUMENT;
475         }
476         return V1_3::ErrorStatus::NONE;
477     }
478 
CheckShapeTensorShape(std::vector<uint32_t> & dimensions)479     static V1_3::ErrorStatus CheckShapeTensorShape(std::vector<uint32_t>& dimensions) {
480         if (dimensions.size() != 1) {
481             return V1_3::ErrorStatus::INVALID_ARGUMENT;
482         }
483         return V1_3::ErrorStatus::NONE;
484     }
485 
CheckTensorStaticAllocation(Operand::LifeTime lifetime)486     static V1_3::ErrorStatus CheckTensorStaticAllocation(Operand::LifeTime lifetime) {
487         if (lifetime != Operand::LifeTime::CONSTANT_COPY &&
488             lifetime != Operand::LifeTime::CONSTANT_REFERENCE &&
489             lifetime != Operand::LifeTime::POINTER) {
490             VLOG(DRIVER) << "CheckTensorStaticAllocation: " << toString(convertToV1_3(lifetime));
491             return V1_3::ErrorStatus::INVALID_ARGUMENT;
492         }
493         return V1_3::ErrorStatus::NONE;
494     }
495 
VisitNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)496     static V1_3::ErrorStatus VisitNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
497                                        RunTimeOperandInfo* operands,
498                                        const std::vector<uint32_t>& xnnpackTensors) {
499         switch (operation.type) {
500             case V1_3::OperationType::ABS:
501                 return VisitAbsNode(subgraph, operation, operands, xnnpackTensors);
502             case V1_3::OperationType::ADD:
503                 return VisitAddNode(subgraph, operation, operands, xnnpackTensors);
504             case V1_3::OperationType::AVERAGE_POOL_2D:
505                 return VisitAveragePool2DNode(subgraph, operation, operands, xnnpackTensors);
506             case V1_3::OperationType::CONV_2D:
507                 return VisitConv2DNode(subgraph, operation, operands, xnnpackTensors);
508             case V1_3::OperationType::DEPTHWISE_CONV_2D:
509                 return VisitDepthwiseConv2DNode(subgraph, operation, operands, xnnpackTensors);
510             case V1_3::OperationType::DIV:
511                 return VisitDivNode(subgraph, operation, operands, xnnpackTensors);
512             case V1_3::OperationType::FLOOR:
513                 return VisitFloorNode(subgraph, operation, operands, xnnpackTensors);
514             case V1_3::OperationType::FULLY_CONNECTED:
515                 return VisitFullyConnectedNode(subgraph, operation, operands, xnnpackTensors);
516             case V1_3::OperationType::HARD_SWISH:
517                 return VisitHardSwishNode(subgraph, operation, operands, xnnpackTensors);
518             case V1_3::OperationType::LOGISTIC:
519                 return VisitLogisticNode(subgraph, operation, operands, xnnpackTensors);
520             case V1_3::OperationType::MAX_POOL_2D:
521                 return VisitMaxPool2DNode(subgraph, operation, operands, xnnpackTensors);
522             case V1_3::OperationType::MAXIMUM:
523                 return VisitMaximumNode(subgraph, operation, operands, xnnpackTensors);
524             case V1_3::OperationType::MEAN:
525                 return VisitMeanNode(subgraph, operation, operands, xnnpackTensors);
526             case V1_3::OperationType::MINIMUM:
527                 return VisitMinimumNode(subgraph, operation, operands, xnnpackTensors);
528             case V1_3::OperationType::MUL:
529                 return VisitMulNode(subgraph, operation, operands, xnnpackTensors);
530             case V1_3::OperationType::NEG:
531                 return VisitNegNode(subgraph, operation, operands, xnnpackTensors);
532             case V1_3::OperationType::PAD:
533                 return VisitPadNode(subgraph, operation, operands, 0.0f, xnnpackTensors);
534             case V1_3::OperationType::PAD_V2:
535                 return VisitPadV2Node(subgraph, operation, operands, xnnpackTensors);
536             case V1_3::OperationType::RESHAPE:
537                 return VisitReshapeNode(subgraph, operation, operands, xnnpackTensors);
538             case V1_3::OperationType::RESIZE_BILINEAR:
539                 return VisitResizeBilinearNode(subgraph, operation, operands, xnnpackTensors);
540             case V1_3::OperationType::PRELU:
541                 return VisitPreluNode(subgraph, operation, operands, xnnpackTensors);
542             case V1_3::OperationType::RELU:
543                 return VisitReluNode(subgraph, operation, operands, 0.0f,
544                                      std::numeric_limits<float>::infinity(), xnnpackTensors);
545             case V1_3::OperationType::RELU1:
546                 return VisitReluNode(subgraph, operation, operands, -1.0f, 1.0f, xnnpackTensors);
547             case V1_3::OperationType::RELU6:
548                 return VisitReluNode(subgraph, operation, operands, 0.0f, 6.0f, xnnpackTensors);
549             case V1_3::OperationType::SQRT:
550                 return VisitSqrtNode(subgraph, operation, operands, xnnpackTensors);
551             case V1_3::OperationType::SUB:
552                 return VisitSubNode(subgraph, operation, operands, xnnpackTensors);
553             case V1_3::OperationType::SOFTMAX:
554                 return VisitSoftmaxNode(subgraph, operation, operands, xnnpackTensors);
555             default:
556                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
557         }
558     }
559 
VisitAbsNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)560     static V1_3::ErrorStatus VisitAbsNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
561                                           RunTimeOperandInfo* operands,
562                                           const std::vector<uint32_t>& xnnpackTensors) {
563         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
564         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
565         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
566         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
567 
568         if (subgraph != nullptr) {
569             const xnn_status status =
570                     xnn_define_abs(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
571                                    /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
572             if (status != xnn_status_success) {
573                 LOG(ERROR) << "XNNPACK xnn_define_abs FAILED";
574                 return V1_3::ErrorStatus::GENERAL_FAILURE;
575             }
576         }
577         return V1_3::ErrorStatus::NONE;
578     }
579 
VisitAddNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)580     static V1_3::ErrorStatus VisitAddNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
581                                           RunTimeOperandInfo* operands,
582                                           const std::vector<uint32_t>& xnnpackTensors) {
583         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
584         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
585         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
586         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
587         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
588         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
589 
590         float outputMin = -std::numeric_limits<float>::infinity();
591         float outputMax = +std::numeric_limits<float>::infinity();
592         int activation = getScalarData<int32_t>(operands[ins[2]]);
593         NN_DRIVER_RETURN_IF_ERROR(
594                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
595 
596         if (subgraph != nullptr) {
597             const xnn_status status =
598                     xnn_define_add2(subgraph, outputMin, outputMax,
599                                     /*input1_id=*/xnnpackTensors[ins[0]],
600                                     /*input2_id=*/xnnpackTensors[ins[1]],
601                                     /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
602             if (status != xnn_status_success) {
603                 LOG(ERROR) << "XNNPACK xnn_define_add2 FAILED";
604                 return V1_3::ErrorStatus::GENERAL_FAILURE;
605             }
606         }
607         return V1_3::ErrorStatus::NONE;
608     }
609 
VisitAveragePool2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)610     static V1_3::ErrorStatus VisitAveragePool2DNode(xnn_subgraph_t subgraph,
611                                                     const V1_3::Operation& operation,
612                                                     RunTimeOperandInfo* operands,
613                                                     const std::vector<uint32_t>& xnnpackTensors) {
614         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
615         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
616         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
617         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
618         // Make sure all scalar params are constant.
619         for (uint32_t i = 1; i < ins.size(); i++) {
620             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
621         }
622 
623         bool use_nchw = false;
624         if (ins.size() == 8) {
625             use_nchw = getScalarData<bool>(operands[ins[7]]);
626         }
627         if (ins.size() == 11) {
628             use_nchw = getScalarData<bool>(operands[ins[10]]);
629         }
630         if (use_nchw) {
631             VLOG(DRIVER) << "XNNPACK VisitAveragePool2DNode FAILED: only NHWC layout is supported";
632             return V1_3::ErrorStatus::INVALID_ARGUMENT;
633         }
634 
635         int32_t stride_width, stride_height, filter_width, filter_height, activation;
636         uint32_t input_padding_top = 0;
637         uint32_t input_padding_right = 0;
638         uint32_t input_padding_bottom = 0;
639         uint32_t input_padding_left = 0;
640         uint32_t flags = 0;
641         if (ins.size() >= 10) {
642             // Explicit padding
643             input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
644             input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
645             input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
646             input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
647             stride_width = getScalarData<int32_t>(operands[ins[5]]);
648             stride_height = getScalarData<int32_t>(operands[ins[6]]);
649             filter_width = getScalarData<int32_t>(operands[ins[7]]);
650             filter_height = getScalarData<int32_t>(operands[ins[8]]);
651             activation = getScalarData<int32_t>(operands[ins[9]]);
652         } else {
653             // Implicit padding
654             int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
655             NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
656             stride_width = getScalarData<int32_t>(operands[ins[2]]);
657             stride_height = getScalarData<int32_t>(operands[ins[3]]);
658             filter_width = getScalarData<int32_t>(operands[ins[4]]);
659             filter_height = getScalarData<int32_t>(operands[ins[5]]);
660             activation = getScalarData<int32_t>(operands[ins[6]]);
661         }
662         NN_DRIVER_RETURN_IF_ERROR(
663                 CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
664 
665         float outputMin = -std::numeric_limits<float>::infinity();
666         float outputMax = +std::numeric_limits<float>::infinity();
667         NN_DRIVER_RETURN_IF_ERROR(
668                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
669 
670         if (subgraph != nullptr) {
671             xnn_status status = xnn_status_success;
672             if (filter_width == 1 && filter_height == 1) {
673                 status = xnn_define_clamp(subgraph, outputMin, outputMax,
674                                           /*input_id=*/xnnpackTensors[ins[0]],
675                                           /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
676             } else {
677                 status = xnn_define_average_pooling_2d(
678                         subgraph, input_padding_top, input_padding_right, input_padding_bottom,
679                         input_padding_left, static_cast<uint32_t>(filter_height),
680                         static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
681                         static_cast<uint32_t>(stride_width), outputMin, outputMax,
682                         /*input_id=*/xnnpackTensors[ins[0]],
683                         /*output_id=*/xnnpackTensors[outs[0]], flags);
684             }
685             if (status != xnn_status_success) {
686                 LOG(ERROR) << "XNNPACK xnn_define_average_pooling_2d FAILED";
687                 return V1_3::ErrorStatus::GENERAL_FAILURE;
688             }
689         }
690         return V1_3::ErrorStatus::NONE;
691     }
692 
VisitConv2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)693     static V1_3::ErrorStatus VisitConv2DNode(xnn_subgraph_t subgraph,
694                                              const V1_3::Operation& operation,
695                                              RunTimeOperandInfo* operands,
696                                              const std::vector<uint32_t>& xnnpackTensors) {
697         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
698         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
699         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
700         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
701         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
702         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
703         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
704         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
705         // Make sure all scalar params are constant.
706         for (uint32_t i = 3; i < ins.size(); i++) {
707             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
708         }
709 
710         bool use_nchw = false;
711         if (ins.size() >= 8 && operands[ins[7]].type == OperandType::BOOL) {
712             use_nchw = getScalarData<bool>(operands[ins[7]]);
713         }
714         if (ins.size() >= 11) {
715             use_nchw = getScalarData<bool>(operands[ins[10]]);
716         }
717         if (use_nchw) {
718             VLOG(DRIVER) << "XNNPACK VisitConv2DNode FAILED: only NHWC layout is supported";
719             return V1_3::ErrorStatus::INVALID_ARGUMENT;
720         }
721 
722         int32_t stride_width, stride_height, activation;
723         int32_t dilation_width_factor = 1;
724         int32_t dilation_height_factor = 1;
725         uint32_t input_padding_top = 0;
726         uint32_t input_padding_right = 0;
727         uint32_t input_padding_bottom = 0;
728         uint32_t input_padding_left = 0;
729         uint32_t flags = 0;
730         if (ins.size() >= 10 && operands[ins[7]].type != OperandType::BOOL) {
731             // Explicit padding
732             input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
733             input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
734             input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
735             input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
736             stride_width = getScalarData<int32_t>(operands[ins[7]]);
737             stride_height = getScalarData<int32_t>(operands[ins[8]]);
738             activation = getScalarData<int32_t>(operands[ins[9]]);
739             if (ins.size() == 13) {
740                 dilation_width_factor = getScalarData<int32_t>(operands[ins[11]]);
741                 dilation_height_factor = getScalarData<int32_t>(operands[ins[12]]);
742             }
743         } else {
744             // Implicit padding
745             int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
746             NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
747             stride_width = getScalarData<int32_t>(operands[ins[4]]);
748             stride_height = getScalarData<int32_t>(operands[ins[5]]);
749             activation = getScalarData<int32_t>(operands[ins[6]]);
750             if (ins.size() == 10) {
751                 dilation_width_factor = getScalarData<int32_t>(operands[ins[8]]);
752                 dilation_height_factor = getScalarData<int32_t>(operands[ins[9]]);
753             }
754         }
755         NN_DRIVER_RETURN_IF_ERROR(CheckConvolutionParams(
756                 stride_width, stride_height, dilation_width_factor, dilation_height_factor));
757 
758         float outputMin = -std::numeric_limits<float>::infinity();
759         float outputMax = +std::numeric_limits<float>::infinity();
760         NN_DRIVER_RETURN_IF_ERROR(
761                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
762 
763         const RunTimeOperandInfo& filter = operands[ins[1]];
764         const uint32_t output_channels = filter.dimensions[0];
765         const uint32_t kernel_height = filter.dimensions[1];
766         const uint32_t kernel_width = filter.dimensions[2];
767         const uint32_t input_channels = filter.dimensions[3];
768 
769         if (subgraph != nullptr) {
770             const xnn_status status = xnn_define_convolution_2d(
771                     subgraph, input_padding_top, input_padding_right, input_padding_bottom,
772                     input_padding_left, static_cast<uint32_t>(kernel_height),
773                     static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
774                     static_cast<uint32_t>(stride_width),
775                     static_cast<uint32_t>(dilation_height_factor),
776                     static_cast<uint32_t>(dilation_width_factor),
777                     /*groups=*/1, static_cast<size_t>(input_channels),
778                     static_cast<size_t>(output_channels), outputMin, outputMax,
779                     /*input_id=*/xnnpackTensors[ins[0]],
780                     /*filter_id=*/xnnpackTensors[ins[1]],
781                     /*bias_id=*/xnnpackTensors[ins[2]],
782                     /*output_id=*/xnnpackTensors[outs[0]], flags);
783             if (status != xnn_status_success) {
784                 LOG(ERROR) << "XNNPACK xnn_define_convolution_2d FAILED";
785                 return V1_3::ErrorStatus::GENERAL_FAILURE;
786             }
787         }
788 
789         return V1_3::ErrorStatus::NONE;
790     }
791 
VisitDepthwiseConv2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)792     static V1_3::ErrorStatus VisitDepthwiseConv2DNode(xnn_subgraph_t subgraph,
793                                                       const V1_3::Operation& operation,
794                                                       RunTimeOperandInfo* operands,
795                                                       const std::vector<uint32_t>& xnnpackTensors) {
796         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
797         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
798         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
799         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
800         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
801         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
802         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
803         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
804         // Make sure all scalar params are constant.
805         for (uint32_t i = 3; i < ins.size(); i++) {
806             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
807         }
808 
809         bool use_nchw = false;
810         if (ins.size() >= 9 && operands[ins[8]].type == OperandType::BOOL) {
811             use_nchw = getScalarData<bool>(operands[ins[8]]);
812         }
813         if (ins.size() >= 12) {
814             use_nchw = getScalarData<bool>(operands[ins[11]]);
815         }
816         if (use_nchw) {
817             VLOG(DRIVER)
818                     << "XNNPACK VisitDepthwiseConv2DNode FAILED: only NHWC layout is supported";
819             return V1_3::ErrorStatus::INVALID_ARGUMENT;
820         }
821 
822         int32_t stride_width, stride_height, depth_multiplier, activation;
823         int32_t dilation_width_factor = 1;
824         int32_t dilation_height_factor = 1;
825         uint32_t input_padding_top = 0;
826         uint32_t input_padding_right = 0;
827         uint32_t input_padding_bottom = 0;
828         uint32_t input_padding_left = 0;
829         uint32_t flags = 0;
830         if (ins.size() >= 11 && operands[ins[8]].type != OperandType::BOOL) {
831             // Explicit padding
832             input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
833             input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
834             input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
835             input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
836             stride_width = getScalarData<int32_t>(operands[ins[7]]);
837             stride_height = getScalarData<int32_t>(operands[ins[8]]);
838             depth_multiplier = getScalarData<int32_t>(operands[ins[9]]);
839             activation = getScalarData<int32_t>(operands[ins[10]]);
840             if (ins.size() == 14) {
841                 dilation_width_factor = getScalarData<int32_t>(operands[ins[12]]);
842                 dilation_height_factor = getScalarData<int32_t>(operands[ins[13]]);
843             }
844         } else {
845             // Implicit padding
846             int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
847             NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
848             stride_width = getScalarData<int32_t>(operands[ins[4]]);
849             stride_height = getScalarData<int32_t>(operands[ins[5]]);
850             depth_multiplier = getScalarData<int32_t>(operands[ins[6]]);
851             activation = getScalarData<int32_t>(operands[ins[7]]);
852             if (ins.size() == 11) {
853                 dilation_width_factor = getScalarData<int32_t>(operands[ins[9]]);
854                 dilation_height_factor = getScalarData<int32_t>(operands[ins[10]]);
855             }
856         }
857         float outputMin = -std::numeric_limits<float>::infinity();
858         float outputMax = +std::numeric_limits<float>::infinity();
859         NN_DRIVER_RETURN_IF_ERROR(
860                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
861 
862         const RunTimeOperandInfo& filter = operands[ins[1]];
863         const uint32_t output_channels = filter.dimensions[3];
864         const uint32_t kernel_height = filter.dimensions[1];
865         const uint32_t kernel_width = filter.dimensions[2];
866         NN_DRIVER_RETURN_IF_ERROR(CheckDepthwiseConvolutionParams(
867                 stride_width, stride_height, dilation_width_factor, dilation_height_factor,
868                 depth_multiplier, output_channels));
869 
870         if (subgraph != nullptr) {
871             const xnn_status status = xnn_define_depthwise_convolution_2d(
872                     subgraph, input_padding_top, input_padding_right, input_padding_bottom,
873                     input_padding_left, static_cast<uint32_t>(kernel_height),
874                     static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
875                     static_cast<uint32_t>(stride_width),
876                     static_cast<uint32_t>(dilation_height_factor),
877                     static_cast<uint32_t>(dilation_width_factor),
878                     static_cast<uint32_t>(depth_multiplier),
879                     /*input_channels=*/
880                     static_cast<uint32_t>(output_channels / depth_multiplier), outputMin, outputMax,
881                     /*input_id=*/xnnpackTensors[ins[0]],
882                     /*filter_id=*/xnnpackTensors[ins[1]],
883                     /*bias_id=*/xnnpackTensors[ins[2]],
884                     /*output_id=*/xnnpackTensors[outs[0]], flags);
885             if (status != xnn_status_success) {
886                 LOG(ERROR) << "XNNPACK xnn_define_depthwise_convolution_2d FAILED";
887                 return V1_3::ErrorStatus::GENERAL_FAILURE;
888             }
889         }
890         return V1_3::ErrorStatus::NONE;
891     }
892 
VisitDivNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)893     static V1_3::ErrorStatus VisitDivNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
894                                           RunTimeOperandInfo* operands,
895                                           const std::vector<uint32_t>& xnnpackTensors) {
896         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
897         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
898         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
899         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
900         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
901         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
902 
903         float outputMin = -std::numeric_limits<float>::infinity();
904         float outputMax = +std::numeric_limits<float>::infinity();
905         int activation = getScalarData<int32_t>(operands[ins[2]]);
906         NN_DRIVER_RETURN_IF_ERROR(
907                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
908 
909         if (subgraph != nullptr) {
910             const xnn_status status =
911                     xnn_define_divide(subgraph, outputMin, outputMax,
912                                       /*input1_id=*/xnnpackTensors[ins[0]],
913                                       /*input2_id=*/xnnpackTensors[ins[1]],
914                                       /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
915             if (status != xnn_status_success) {
916                 LOG(ERROR) << "XNNPACK xnn_define_divide FAILED";
917                 return V1_3::ErrorStatus::GENERAL_FAILURE;
918             }
919         }
920         return V1_3::ErrorStatus::NONE;
921     }
922 
VisitFullyConnectedNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)923     static V1_3::ErrorStatus VisitFullyConnectedNode(xnn_subgraph_t subgraph,
924                                                      const V1_3::Operation& operation,
925                                                      RunTimeOperandInfo* operands,
926                                                      const std::vector<uint32_t>& xnnpackTensors) {
927         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
928         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
929         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
930         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
931         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
932         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
933         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
934         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[3]].lifetime));
935         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
936 
937         float outputMin = -std::numeric_limits<float>::infinity();
938         float outputMax = +std::numeric_limits<float>::infinity();
939         int activation = getScalarData<int32_t>(operands[ins[3]]);
940         NN_DRIVER_RETURN_IF_ERROR(
941                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
942 
943         if (subgraph != nullptr) {
944             const xnn_status status =
945                     xnn_define_fully_connected(subgraph, outputMin, outputMax,
946                                                /*input_id=*/xnnpackTensors[ins[0]],
947                                                /*filter_id=*/xnnpackTensors[ins[1]],
948                                                /*bias_id=*/xnnpackTensors[ins[2]],
949                                                /*output_id=*/xnnpackTensors[outs[0]],
950                                                /*flags=*/XNN_FLAG_TENSORFLOW_RESHAPE_2D);
951             if (status != xnn_status_success) {
952                 LOG(ERROR) << "XNNPACK xnn_define_fully_connected FAILED";
953                 return V1_3::ErrorStatus::GENERAL_FAILURE;
954             }
955         }
956         return V1_3::ErrorStatus::NONE;
957     }
958 
VisitFloorNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)959     static V1_3::ErrorStatus VisitFloorNode(xnn_subgraph_t subgraph,
960                                             const V1_3::Operation& operation,
961                                             RunTimeOperandInfo* operands,
962                                             const std::vector<uint32_t>& xnnpackTensors) {
963         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
964         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
965         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
966         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
967 
968         if (subgraph != nullptr) {
969             const xnn_status status =
970                     xnn_define_floor(subgraph,
971                                      /*input1_id=*/xnnpackTensors[ins[0]],
972                                      /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
973             if (status != xnn_status_success) {
974                 LOG(ERROR) << "XNNPACK xnn_define_floor FAILED";
975                 return V1_3::ErrorStatus::GENERAL_FAILURE;
976             }
977         }
978         return V1_3::ErrorStatus::NONE;
979     }
980 
VisitHardSwishNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)981     static V1_3::ErrorStatus VisitHardSwishNode(xnn_subgraph_t subgraph,
982                                                 const V1_3::Operation& operation,
983                                                 RunTimeOperandInfo* operands,
984                                                 const std::vector<uint32_t>& xnnpackTensors) {
985         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
986         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
987         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
988         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
989 
990         if (subgraph != nullptr) {
991             const xnn_status status =
992                     xnn_define_hardswish(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
993                                          /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
994             if (status != xnn_status_success) {
995                 LOG(ERROR) << "XNNPACK xnn_define_hardswish FAILED";
996                 return V1_3::ErrorStatus::GENERAL_FAILURE;
997             }
998         }
999         return V1_3::ErrorStatus::NONE;
1000     }
1001 
VisitLogisticNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1002     static V1_3::ErrorStatus VisitLogisticNode(xnn_subgraph_t subgraph,
1003                                                const V1_3::Operation& operation,
1004                                                RunTimeOperandInfo* operands,
1005                                                const std::vector<uint32_t>& xnnpackTensors) {
1006         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1007         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1008         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1009         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1010 
1011         if (subgraph != nullptr) {
1012             const xnn_status status =
1013                     xnn_define_sigmoid(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1014                                        /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1015             if (status != xnn_status_success) {
1016                 LOG(ERROR) << "XNNPACK xnn_define_sigmoid FAILED";
1017                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1018             }
1019         }
1020         return V1_3::ErrorStatus::NONE;
1021     }
1022 
VisitMaxPool2DNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1023     static V1_3::ErrorStatus VisitMaxPool2DNode(xnn_subgraph_t subgraph,
1024                                                 const V1_3::Operation& operation,
1025                                                 RunTimeOperandInfo* operands,
1026                                                 const std::vector<uint32_t>& xnnpackTensors) {
1027         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1028         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1029         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1030         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1031         // Make sure all scalar params are constant.
1032         for (uint32_t i = 1; i < ins.size(); i++) {
1033             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
1034         }
1035 
1036         bool use_nchw = false;
1037         if (ins.size() == 8) {
1038             use_nchw = getScalarData<bool>(operands[ins[7]]);
1039         }
1040         if (ins.size() == 11) {
1041             use_nchw = getScalarData<bool>(operands[ins[10]]);
1042         }
1043         if (use_nchw) {
1044             VLOG(DRIVER) << "XNNPACK VisitMaxPool2DNode FAILED: only NHWC layout is supported";
1045             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1046         }
1047 
1048         int32_t stride_width, stride_height, filter_width, filter_height, activation;
1049         uint32_t input_padding_top = 0;
1050         uint32_t input_padding_right = 0;
1051         uint32_t input_padding_bottom = 0;
1052         uint32_t input_padding_left = 0;
1053         uint32_t flags = 0;
1054         if (ins.size() >= 10) {
1055             // Explicit padding
1056             input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
1057             input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
1058             input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
1059             input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
1060             stride_width = getScalarData<int32_t>(operands[ins[5]]);
1061             stride_height = getScalarData<int32_t>(operands[ins[6]]);
1062             filter_width = getScalarData<int32_t>(operands[ins[7]]);
1063             filter_height = getScalarData<int32_t>(operands[ins[8]]);
1064             activation = getScalarData<int32_t>(operands[ins[9]]);
1065         } else {
1066             // Implicit padding
1067             int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
1068             NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
1069             stride_width = getScalarData<int32_t>(operands[ins[2]]);
1070             stride_height = getScalarData<int32_t>(operands[ins[3]]);
1071             filter_width = getScalarData<int32_t>(operands[ins[4]]);
1072             filter_height = getScalarData<int32_t>(operands[ins[5]]);
1073             activation = getScalarData<int32_t>(operands[ins[6]]);
1074         }
1075         NN_DRIVER_RETURN_IF_ERROR(
1076                 CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
1077 
1078         float outputMin = -std::numeric_limits<float>::infinity();
1079         float outputMax = +std::numeric_limits<float>::infinity();
1080         NN_DRIVER_RETURN_IF_ERROR(
1081                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1082 
1083         if (subgraph != nullptr) {
1084             xnn_status status = xnn_status_success;
1085             if (filter_width == 1 && filter_height == 1) {
1086                 status = xnn_define_clamp(subgraph, outputMin, outputMax,
1087                                           /*input_id=*/xnnpackTensors[ins[0]],
1088                                           /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1089             } else {
1090                 status = xnn_define_max_pooling_2d(
1091                         subgraph, input_padding_top, input_padding_right, input_padding_bottom,
1092                         input_padding_left, static_cast<uint32_t>(filter_height),
1093                         static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
1094                         static_cast<uint32_t>(stride_width), /*dilation_height=*/1,
1095                         /*dilation_width=*/1, outputMin, outputMax,
1096                         /*input_id=*/xnnpackTensors[ins[0]],
1097                         /*output_id=*/xnnpackTensors[outs[0]], flags);
1098             }
1099             if (status != xnn_status_success) {
1100                 LOG(ERROR) << "XNNPACK xnn_define_max_pooling_2d FAILED";
1101                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1102             }
1103         }
1104         return V1_3::ErrorStatus::NONE;
1105     }
1106 
VisitMaximumNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1107     static V1_3::ErrorStatus VisitMaximumNode(xnn_subgraph_t subgraph,
1108                                               const V1_3::Operation& operation,
1109                                               RunTimeOperandInfo* operands,
1110                                               const std::vector<uint32_t>& xnnpackTensors) {
1111         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1112         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1113         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1114         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1115         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1116         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1117 
1118         float outputMin = -std::numeric_limits<float>::infinity();
1119         float outputMax = +std::numeric_limits<float>::infinity();
1120         int activation = getScalarData<int32_t>(operands[ins[2]]);
1121         NN_DRIVER_RETURN_IF_ERROR(
1122                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1123 
1124         if (subgraph != nullptr) {
1125             const xnn_status status =
1126                     xnn_define_maximum2(subgraph,
1127                                         /*input1_id=*/xnnpackTensors[ins[0]],
1128                                         /*input2_id=*/xnnpackTensors[ins[1]],
1129                                         /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1130             if (status != xnn_status_success) {
1131                 LOG(ERROR) << "XNNPACK xnn_define_maximum2 FAILED";
1132                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1133             }
1134         }
1135         return V1_3::ErrorStatus::NONE;
1136     }
1137 
VisitMeanNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1138     static V1_3::ErrorStatus VisitMeanNode(xnn_subgraph_t subgraph,
1139                                            const V1_3::Operation& operation,
1140                                            RunTimeOperandInfo* operands,
1141                                            const std::vector<uint32_t>& xnnpackTensors) {
1142         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1143         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1144         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1145         NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
1146         NN_DRIVER_RETURN_IF_ERROR(CheckAxesTensorShape(operands[ins[1]].dimensions));
1147         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1148         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1149         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1150         NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
1151 
1152         int keep_dims = getScalarData<int32_t>(operands[ins[2]]);
1153         if (keep_dims <= 0) {
1154             LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: only support keep_dims";
1155             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1156         }
1157         const int32_t* axes_buffer = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
1158         if (operands[ins[1]].dimensions[0] != 2) {
1159             LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
1160             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1161         }
1162         if (std::min(axes_buffer[0], axes_buffer[1]) != 1 ||
1163             std::max(axes_buffer[0], axes_buffer[1]) != 2) {
1164             LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
1165             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1166         }
1167         if (subgraph != nullptr) {
1168             const xnn_status status = xnn_define_global_average_pooling_2d(
1169                     subgraph,
1170                     /*outputMin=*/-std::numeric_limits<float>::infinity(),
1171                     /*outputMax=*/+std::numeric_limits<float>::infinity(),
1172                     /*input_id=*/xnnpackTensors[ins[0]],
1173                     /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1174             if (status != xnn_status_success) {
1175                 LOG(ERROR) << "XNNPACK xnn_define_global_average_pooling_2d FAILED";
1176                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1177             }
1178         }
1179         return V1_3::ErrorStatus::NONE;
1180     }
1181 
VisitMinimumNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1182     static V1_3::ErrorStatus VisitMinimumNode(xnn_subgraph_t subgraph,
1183                                               const V1_3::Operation& operation,
1184                                               RunTimeOperandInfo* operands,
1185                                               const std::vector<uint32_t>& xnnpackTensors) {
1186         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1187         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1188         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1189         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1190         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1191         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1192 
1193         float outputMin = -std::numeric_limits<float>::infinity();
1194         float outputMax = +std::numeric_limits<float>::infinity();
1195         int activation = getScalarData<int32_t>(operands[ins[2]]);
1196         NN_DRIVER_RETURN_IF_ERROR(
1197                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1198 
1199         if (subgraph != nullptr) {
1200             const xnn_status status =
1201                     xnn_define_minimum2(subgraph,
1202                                         /*input1_id=*/xnnpackTensors[ins[0]],
1203                                         /*input2_id=*/xnnpackTensors[ins[1]],
1204                                         /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1205             if (status != xnn_status_success) {
1206                 LOG(ERROR) << "XNNPACK xnn_define_minimum2 FAILED";
1207                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1208             }
1209         }
1210         return V1_3::ErrorStatus::NONE;
1211     }
1212 
VisitMulNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1213     static V1_3::ErrorStatus VisitMulNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1214                                           RunTimeOperandInfo* operands,
1215                                           const std::vector<uint32_t>& xnnpackTensors) {
1216         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1217         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1218         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1219         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1220         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1221         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1222 
1223         int activation = getScalarData<int32_t>(operands[ins[2]]);
1224         float outputMin = -std::numeric_limits<float>::infinity();
1225         float outputMax = +std::numeric_limits<float>::infinity();
1226         NN_DRIVER_RETURN_IF_ERROR(
1227                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1228 
1229         if (subgraph != nullptr) {
1230             const xnn_status status =
1231                     xnn_define_multiply2(subgraph, outputMin, outputMax,
1232                                          /*input1_id=*/xnnpackTensors[ins[0]],
1233                                          /*input2_id=*/xnnpackTensors[ins[1]],
1234                                          /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1235             if (status != xnn_status_success) {
1236                 LOG(ERROR) << "XNNPACK xnn_define_multiply2 FAILED";
1237                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1238             }
1239         }
1240         return V1_3::ErrorStatus::NONE;
1241     }
1242 
VisitNegNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1243     static V1_3::ErrorStatus VisitNegNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1244                                           RunTimeOperandInfo* operands,
1245                                           const std::vector<uint32_t>& xnnpackTensors) {
1246         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1247         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1248         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1249         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1250 
1251         if (subgraph != nullptr) {
1252             const xnn_status status =
1253                     xnn_define_negate(subgraph,
1254                                       /*input1_id=*/xnnpackTensors[ins[0]],
1255                                       /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1256             if (status != xnn_status_success) {
1257                 LOG(ERROR) << "XNNPACK xnn_define_negate FAILED";
1258                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1259             }
1260         }
1261         return V1_3::ErrorStatus::NONE;
1262     }
1263 
VisitPreluNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1264     static V1_3::ErrorStatus VisitPreluNode(xnn_subgraph_t subgraph,
1265                                             const V1_3::Operation& operation,
1266                                             RunTimeOperandInfo* operands,
1267                                             const std::vector<uint32_t>& xnnpackTensors) {
1268         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1269         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1270         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1271         NN_DRIVER_RETURN_IF_ERROR(
1272                 CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1273         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1274         NN_DRIVER_RETURN_IF_ERROR(CheckSlopeTensorShape(operands[ins[1]].dimensions));
1275         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1276         NN_DRIVER_RETURN_IF_ERROR(
1277                 CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1278 
1279         if (subgraph != nullptr) {
1280             const xnn_status status =
1281                     xnn_define_prelu(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1282                                      /*slope_id=*/xnnpackTensors[ins[1]],
1283                                      /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1284             if (status != xnn_status_success) {
1285                 LOG(ERROR) << "XNNPACK xnn_define_prelu FAILED";
1286                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1287             }
1288         }
1289         return V1_3::ErrorStatus::NONE;
1290     }
1291 
VisitPadNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,float padding_value,const std::vector<uint32_t> & xnnpackTensors)1292     static V1_3::ErrorStatus VisitPadNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1293                                           RunTimeOperandInfo* operands, float padding_value,
1294                                           const std::vector<uint32_t>& xnnpackTensors) {
1295         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1296         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1297         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1298         NN_DRIVER_RETURN_IF_ERROR(
1299                 CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1300         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1301         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1302         NN_DRIVER_RETURN_IF_ERROR(
1303                 CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
1304 
1305         const int32_t* paddings_data = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
1306         for (size_t i = 0; i < operands[ins[1]].dimensions.size() * 2; i++) {
1307             if (paddings_data[i] < 0) return V1_3::ErrorStatus::INVALID_ARGUMENT;
1308         }
1309         if (subgraph != nullptr) {
1310             std::array<size_t, XNN_MAX_TENSOR_DIMS> pre_paddings{};
1311             std::array<size_t, XNN_MAX_TENSOR_DIMS> post_paddings{};
1312             for (size_t i = 0; i < operands[ins[1]].dimensions.size(); i++) {
1313                 pre_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 0]);
1314                 post_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 1]);
1315             }
1316             const xnn_status status = xnn_define_static_constant_pad(
1317                     subgraph, pre_paddings.data(), post_paddings.data(), padding_value,
1318                     /*input_id=*/xnnpackTensors[ins[0]],
1319                     /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1320             if (status != xnn_status_success) {
1321                 LOG(ERROR) << "XNNPACK xnn_define_static_constant_pad FAILED";
1322                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1323             }
1324         }
1325         return V1_3::ErrorStatus::NONE;
1326     }
1327 
VisitPadV2Node(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1328     static V1_3::ErrorStatus VisitPadV2Node(xnn_subgraph_t subgraph,
1329                                             const V1_3::Operation& operation,
1330                                             RunTimeOperandInfo* operands,
1331                                             const std::vector<uint32_t>& xnnpackTensors) {
1332         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1333         if (operands[ins[2]].type != OperandType::FLOAT32) {
1334             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1335         }
1336         float padding_value = getScalarData<float>(operands[ins[2]]);
1337         return VisitPadNode(subgraph, operation, operands, padding_value, xnnpackTensors);
1338     }
1339 
VisitReshapeNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1340     static V1_3::ErrorStatus VisitReshapeNode(xnn_subgraph_t subgraph,
1341                                               const V1_3::Operation& operation,
1342                                               RunTimeOperandInfo* operands,
1343                                               const std::vector<uint32_t>& xnnpackTensors) {
1344         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1345         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1346         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1347         NN_DRIVER_RETURN_IF_ERROR(
1348                 CheckTensorShape(operands[ins[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
1349         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1350         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1351         NN_DRIVER_RETURN_IF_ERROR(
1352                 CheckTensorShape(operands[outs[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
1353 
1354         if (subgraph != nullptr) {
1355             std::array<size_t, XNN_MAX_TENSOR_DIMS> new_shape;
1356             for (uint32_t i = 0; i < operands[outs[0]].dimensions.size(); i++) {
1357                 new_shape[i] = static_cast<size_t>(operands[outs[0]].dimensions[i]);
1358             }
1359             const xnn_status status = xnn_define_static_reshape(
1360                     subgraph, static_cast<size_t>(operands[outs[0]].dimensions.size()),
1361                     new_shape.data(),
1362                     /*input_id=*/xnnpackTensors[ins[0]],
1363                     /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1364             if (status != xnn_status_success) {
1365                 LOG(ERROR) << "XNNPACK xnn_define_static_reshape FAILED";
1366                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1367             }
1368         }
1369         return V1_3::ErrorStatus::NONE;
1370     }
1371 
VisitResizeBilinearNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1372     static V1_3::ErrorStatus VisitResizeBilinearNode(xnn_subgraph_t subgraph,
1373                                                      const V1_3::Operation& operation,
1374                                                      RunTimeOperandInfo* operands,
1375                                                      const std::vector<uint32_t>& xnnpackTensors) {
1376         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1377         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1378         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1379         NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
1380         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1381         NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
1382         // Make sure all scalar params are constant.
1383         for (uint32_t i = 1; i < ins.size(); i++) {
1384             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
1385         }
1386 
1387         if (ins.size() >= 4) {
1388             bool use_nchw = getScalarData<bool>(operands[ins[3]]);
1389             if (use_nchw) {
1390                 VLOG(DRIVER)
1391                         << "XNNPACK VisitResizeBilinearNode FAILED: only NHWC layout is supported";
1392                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1393             }
1394         }
1395 
1396         size_t new_height, new_width;
1397         if (operands[ins[1]].type == OperandType::INT32) {
1398             // explicitly specify the output dimension.
1399             new_width = static_cast<size_t>(getScalarData<int32_t>(operands[ins[1]]));
1400             new_height = static_cast<size_t>(getScalarData<int32_t>(operands[ins[2]]));
1401         } else if (operands[ins[1]].type == OperandType::FLOAT32) {
1402             // specify the output dimension scaling factor.
1403             float width_scale = getScalarData<float>(operands[ins[1]]);
1404             float height_scale = getScalarData<float>(operands[ins[2]]);
1405             if (width_scale <= 0 || height_scale <= 0) {
1406                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1407             }
1408             new_height = static_cast<size_t>(operands[ins[0]].dimensions[1] * height_scale);
1409             new_width = static_cast<size_t>(operands[ins[0]].dimensions[2] * width_scale);
1410         } else {
1411             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1412         }
1413 
1414         bool align_corners = false;
1415         bool half_pixel_centers = false;
1416         if (ins.size() == 6) {
1417             align_corners = getScalarData<bool>(operands[ins[4]]);
1418             half_pixel_centers = getScalarData<bool>(operands[ins[5]]);
1419         }
1420         if (align_corners && !half_pixel_centers) {
1421             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1422         }
1423         if (subgraph != nullptr) {
1424             uint32_t flags = 0;
1425             if (align_corners) {
1426                 flags |= XNN_FLAG_ALIGN_CORNERS;
1427             } else if (!half_pixel_centers) {
1428                 flags |= XNN_FLAG_TENSORFLOW_LEGACY_MODE;
1429             }
1430             const xnn_status status = xnn_define_static_resize_bilinear_2d(
1431                     subgraph, new_height, new_width,
1432                     /*input_id=*/xnnpackTensors[ins[0]],
1433                     /*output_id=*/xnnpackTensors[outs[0]], flags);
1434             if (status != xnn_status_success) {
1435                 LOG(ERROR) << "XNNPACK xnn_define_static_resize_bilinear_2d FAILED";
1436                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1437             }
1438         }
1439         return V1_3::ErrorStatus::NONE;
1440     }
1441 
VisitReluNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,float outputMin,float outputMax,const std::vector<uint32_t> & xnnpackTensors)1442     static V1_3::ErrorStatus VisitReluNode(xnn_subgraph_t subgraph,
1443                                            const V1_3::Operation& operation,
1444                                            RunTimeOperandInfo* operands, float outputMin,
1445                                            float outputMax,
1446                                            const std::vector<uint32_t>& xnnpackTensors) {
1447         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1448         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1449         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1450         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1451 
1452         if (subgraph != nullptr) {
1453             const xnn_status status =
1454                     xnn_define_clamp(subgraph, outputMin, outputMax,
1455                                      /*input_id=*/xnnpackTensors[ins[0]],
1456                                      /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1457             if (status != xnn_status_success) {
1458                 LOG(ERROR) << "XNNPACK xnn_define_clamp FAILED";
1459                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1460             }
1461         }
1462         return V1_3::ErrorStatus::NONE;
1463     }
1464 
VisitSqrtNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1465     static V1_3::ErrorStatus VisitSqrtNode(xnn_subgraph_t subgraph,
1466                                            const V1_3::Operation& operation,
1467                                            RunTimeOperandInfo* operands,
1468                                            const std::vector<uint32_t>& xnnpackTensors) {
1469         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1470         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1471         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1472         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1473 
1474         if (subgraph != nullptr) {
1475             const xnn_status status =
1476                     xnn_define_square_root(subgraph,
1477                                            /*input1_id=*/xnnpackTensors[ins[0]],
1478                                            /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1479             if (status != xnn_status_success) {
1480                 LOG(ERROR) << "XNNPACK xnn_define_bankers_rounding FAILED";
1481                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1482             }
1483         }
1484         return V1_3::ErrorStatus::NONE;
1485     }
1486 
VisitSubNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1487     static V1_3::ErrorStatus VisitSubNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
1488                                           RunTimeOperandInfo* operands,
1489                                           const std::vector<uint32_t>& xnnpackTensors) {
1490         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1491         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1492         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1493         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
1494         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1495         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1496 
1497         float outputMin = -std::numeric_limits<float>::infinity();
1498         float outputMax = +std::numeric_limits<float>::infinity();
1499         int activation = getScalarData<int32_t>(operands[ins[2]]);
1500         NN_DRIVER_RETURN_IF_ERROR(
1501                 ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
1502 
1503         if (subgraph != nullptr) {
1504             const xnn_status status =
1505                     xnn_define_subtract(subgraph, outputMin, outputMax,
1506                                         /*input1_id=*/xnnpackTensors[ins[0]],
1507                                         /*input2_id=*/xnnpackTensors[ins[1]],
1508                                         /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1509             if (status != xnn_status_success) {
1510                 LOG(ERROR) << "XNNPACK xnn_define_subtract FAILED";
1511                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1512             }
1513         }
1514         return V1_3::ErrorStatus::NONE;
1515     }
1516 
VisitSoftmaxNode(xnn_subgraph_t subgraph,const V1_3::Operation & operation,RunTimeOperandInfo * operands,const std::vector<uint32_t> & xnnpackTensors)1517     static V1_3::ErrorStatus VisitSoftmaxNode(xnn_subgraph_t subgraph,
1518                                               const V1_3::Operation& operation,
1519                                               RunTimeOperandInfo* operands,
1520                                               const std::vector<uint32_t>& xnnpackTensors) {
1521         const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
1522         const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
1523         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
1524         NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
1525         NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
1526 
1527         float beta = getScalarData<float>(operands[ins[1]]);
1528         if (beta != 1.0f) {
1529             LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported beta value: " << beta;
1530             return V1_3::ErrorStatus::INVALID_ARGUMENT;
1531         }
1532         if (ins.size() >= 3) {
1533             NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
1534             int axis = getScalarData<int32_t>(operands[ins[2]]);
1535             if (axis != -1) {
1536                 LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported axis value: " << axis;
1537                 return V1_3::ErrorStatus::INVALID_ARGUMENT;
1538             }
1539         }
1540         if (subgraph != nullptr) {
1541             const xnn_status status =
1542                     xnn_define_softmax(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
1543                                        /*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
1544             if (status != xnn_status_success) {
1545                 LOG(ERROR) << "XNNPACK xnn_define_softmax FAILED";
1546                 return V1_3::ErrorStatus::GENERAL_FAILURE;
1547             }
1548         }
1549 
1550         return V1_3::ErrorStatus::NONE;
1551     }
1552 
1553    private:
Subgraph(xnn_runtime_t runtime,std::unordered_set<uint32_t> && externals,bool useStaticBuffer=false)1554     Subgraph(xnn_runtime_t runtime, std::unordered_set<uint32_t>&& externals,
1555              bool useStaticBuffer = false)
1556         : mRuntime(runtime, &xnn_delete_runtime),
1557           mExternals(externals),
1558           mUseStaticBuffer(useStaticBuffer) {}
1559 
1560     // XNNPACK Runtime (subgraph + workspace) with smart-pointer for lifetime
1561     // management.
1562     std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> mRuntime{nullptr,
1563                                                                          &xnn_delete_runtime};
1564     std::unordered_set<uint32_t> mExternals;
1565     bool mFirstRun = true;
1566     bool mUseStaticBuffer;
1567 };
1568 
1569 class SamplePreparedModelXNNPACK : public SamplePreparedModel {
1570    public:
SamplePreparedModelXNNPACK(const V1_3::Model & model,const SampleDriver * driver,V1_1::ExecutionPreference preference,uid_t userId,V1_3::Priority priority)1571     SamplePreparedModelXNNPACK(const V1_3::Model& model, const SampleDriver* driver,
1572                                V1_1::ExecutionPreference preference, uid_t userId,
1573                                V1_3::Priority priority)
1574         : SamplePreparedModel(model, driver, preference, userId, priority),
1575           mSubgraph(nullptr),
1576           mThreadpool(nullptr) {}
~SamplePreparedModelXNNPACK()1577     ~SamplePreparedModelXNNPACK() {
1578         delete mSubgraph;
1579         pthreadpool_destroy(mThreadpool);
1580     };
1581     bool initialize();
1582     hardware::Return<V1_0::ErrorStatus> execute(
1583             const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override;
1584     hardware::Return<V1_0::ErrorStatus> execute_1_2(
1585             const V1_0::Request& request, V1_2::MeasureTiming measure,
1586             const sp<V1_2::IExecutionCallback>& callback) override;
1587     hardware::Return<V1_3::ErrorStatus> execute_1_3(
1588             const V1_3::Request& request, V1_2::MeasureTiming measure,
1589             const V1_3::OptionalTimePoint& deadline,
1590             const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1591             const sp<V1_3::IExecutionCallback>& callback) override;
1592     hardware::Return<void> executeSynchronously(const V1_0::Request& request,
1593                                                 V1_2::MeasureTiming measure,
1594                                                 executeSynchronously_cb cb) override;
1595     hardware::Return<void> executeSynchronously_1_3(
1596             const V1_3::Request& request, V1_2::MeasureTiming measure,
1597             const V1_3::OptionalTimePoint& deadline,
1598             const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1599             executeSynchronously_1_3_cb cb) override;
1600     hardware::Return<void> configureExecutionBurst(
1601             const sp<V1_2::IBurstCallback>& callback,
1602             const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
1603             const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
1604             configureExecutionBurst_cb cb) override;
1605     hardware::Return<void> executeFenced(const V1_3::Request& request,
1606                                          const hardware::hidl_vec<hardware::hidl_handle>& wait_for,
1607                                          V1_2::MeasureTiming measure,
1608                                          const V1_3::OptionalTimePoint& deadline,
1609                                          const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1610                                          const V1_3::OptionalTimeoutDuration& duration,
1611                                          executeFenced_cb callback) override;
1612 
1613    private:
1614     Subgraph* mSubgraph;
1615     std::vector<RunTimeOperandInfo> mOperands;
1616     pthreadpool* mThreadpool;
1617 };
1618 
configureExecutionBurst(const sp<V1_2::IBurstCallback> & callback,const MQDescriptorSync<V1_2::FmqRequestDatum> & requestChannel,const MQDescriptorSync<V1_2::FmqResultDatum> & resultChannel,configureExecutionBurst_cb cb)1619 hardware::Return<void> SamplePreparedModelXNNPACK::configureExecutionBurst(
1620         const sp<V1_2::IBurstCallback>& callback,
1621         const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
1622         const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
1623         configureExecutionBurst_cb cb) {
1624     VLOG(DRIVER) << "SamplePreparedModelXNNPACK::configureExecutionBurst not supported";
1625     cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
1626     return hardware::Void();
1627 }
1628 
initialize()1629 bool SamplePreparedModelXNNPACK::initialize() {
1630     auto status = SamplePreparedModel::initialize();
1631     mThreadpool = pthreadpool_create(kNumOfWorkerThreads);
1632     if (mThreadpool == nullptr) {
1633         VLOG(DRIVER) << "SamplePreparedModelXNNPACK::initialize failed to create pthreadpool, "
1634                         "fallback to single threaded execution";
1635     }
1636     const V1_3::Model* model = getModel();
1637     mOperands = initializeRunTimeInfo(model->main, mPoolInfos, &model->operandValues);
1638     mSubgraph = Subgraph::Create(model->main.operations, mOperands, model->main.inputIndexes,
1639                                  model->main.outputIndexes, mThreadpool);
1640     return status;
1641 }
1642 
1643 template <typename T_IExecutionCallback>
asyncExecuteXNNPACK(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const LegacyOptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<T_IExecutionCallback> & callback)1644 void asyncExecuteXNNPACK(Subgraph* subgraph, RunTimeOperandInfo* operands,
1645                          const V1_3::Request& request, V1_2::MeasureTiming measure,
1646                          const V1_3::Model& model, const LegacyOptionalTimePoint& deadline,
1647                          const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1648                          const sp<T_IExecutionCallback>& callback) {
1649     std::vector<RunTimePoolInfo> requestPoolInfos;
1650     if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1651         notify(callback, V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
1652     }
1653     updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
1654     updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
1655     auto status = subgraph->Invoke(operands);
1656     VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1657     if (status == V1_3::ErrorStatus::NONE) {
1658         VLOG(DRIVER) << "Completed run normally";
1659         for (auto& runtimeInfo : requestPoolInfos) {
1660             runtimeInfo.flush();
1661         }
1662     }
1663     notify(callback, status, {}, kNoTiming);
1664 }
1665 
1666 template <typename T_IExecutionCallback>
executeXNNPACKBase(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<T_IExecutionCallback> & callback)1667 V1_3::ErrorStatus executeXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
1668                                      const V1_3::Request& request, V1_2::MeasureTiming measure,
1669                                      const V1_3::Model& model,
1670                                      const V1_3::OptionalTimePoint& halDeadline,
1671                                      const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1672                                      const sp<T_IExecutionCallback>& callback) {
1673     VLOG(DRIVER) << "executeXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
1674 
1675     if (callback.get() == nullptr) {
1676         LOG(ERROR) << "invalid callback passed to executeXNNPACKBase";
1677         return V1_3::ErrorStatus::INVALID_ARGUMENT;
1678     }
1679     if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
1680         notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
1681         return V1_3::ErrorStatus::INVALID_ARGUMENT;
1682     }
1683     const auto deadline = makeDeadline(halDeadline);
1684     if (hasDeadlinePassed(deadline)) {
1685         notify(callback, V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming);
1686         return V1_3::ErrorStatus::NONE;
1687     }
1688 
1689     // This thread is intentionally detached because the sample driver service
1690     // is expected to live forever.
1691     std::thread([&subgraph, &operands, &model, request, measure, deadline, loopTimeoutDuration,
1692                  callback] {
1693         asyncExecuteXNNPACK(subgraph, operands, request, measure, model, deadline,
1694                             loopTimeoutDuration, callback);
1695     }).detach();
1696 
1697     return V1_3::ErrorStatus::NONE;
1698 }
1699 
execute(const V1_0::Request & request,const sp<V1_0::IExecutionCallback> & callback)1700 hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute(
1701         const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) {
1702     const V1_3::Model* model = getModel();
1703     const V1_3::ErrorStatus status =
1704             executeXNNPACKBase(mSubgraph, mOperands.data(), convertToV1_3(request),
1705                                V1_2::MeasureTiming::NO, *model, {}, {}, callback);
1706     return convertToV1_0(status);
1707 }
1708 
execute_1_2(const V1_0::Request & request,V1_2::MeasureTiming measure,const sp<V1_2::IExecutionCallback> & callback)1709 hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_2(
1710         const V1_0::Request& request, V1_2::MeasureTiming measure,
1711         const sp<V1_2::IExecutionCallback>& callback) {
1712     const V1_3::Model* model = getModel();
1713     const V1_3::ErrorStatus status = executeXNNPACKBase(
1714             mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {}, callback);
1715     return convertToV1_0(status);
1716 }
1717 
execute_1_3(const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const sp<V1_3::IExecutionCallback> & callback)1718 hardware::Return<V1_3::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_3(
1719         const V1_3::Request& request, V1_2::MeasureTiming measure,
1720         const V1_3::OptionalTimePoint& deadline,
1721         const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1722         const sp<V1_3::IExecutionCallback>& callback) {
1723     const V1_3::Model* model = getModel();
1724     return executeXNNPACKBase(mSubgraph, mOperands.data(), request, measure, *model, deadline,
1725                               loopTimeoutDuration, callback);
1726 }
1727 
1728 static std::tuple<V1_3::ErrorStatus, hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing>
executeSynchronouslyXNNPACKBase(Subgraph * subgraph,RunTimeOperandInfo * operands,const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::Model & model,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration)1729 executeSynchronouslyXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
1730                                 const V1_3::Request& request, V1_2::MeasureTiming measure,
1731                                 const V1_3::Model& model,
1732                                 const V1_3::OptionalTimePoint& halDeadline,
1733                                 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration) {
1734     VLOG(DRIVER) << "executeSynchronouslyXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
1735 
1736     if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
1737         return {V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
1738     }
1739     const auto deadline = makeDeadline(halDeadline);
1740     if (hasDeadlinePassed(deadline)) {
1741         return {V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming};
1742     }
1743 
1744     std::vector<RunTimePoolInfo> requestPoolInfos;
1745     if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1746         return {V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming};
1747     }
1748     updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
1749     updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
1750     VLOG(DRIVER) << "XNNPACK subgraph invoke started";
1751     auto status = subgraph->Invoke(operands);
1752     VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1753     if (status == V1_3::ErrorStatus::NONE) {
1754         VLOG(DRIVER) << "Completed run normally";
1755         for (auto& runtimeInfo : requestPoolInfos) {
1756             runtimeInfo.flush();
1757         }
1758     }
1759     return {status, {}, kNoTiming};
1760 }
1761 
executeSynchronously(const V1_0::Request & request,V1_2::MeasureTiming measure,executeSynchronously_cb cb)1762 hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously(
1763         const V1_0::Request& request, V1_2::MeasureTiming measure, executeSynchronously_cb cb) {
1764     const V1_3::Model* model = getModel();
1765     auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
1766             mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {});
1767     cb(convertToV1_0(status), std::move(outputShapes), timing);
1768     return hardware::Void();
1769 }
1770 
executeSynchronously_1_3(const V1_3::Request & request,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & deadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,executeSynchronously_1_3_cb cb)1771 hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously_1_3(
1772         const V1_3::Request& request, V1_2::MeasureTiming measure,
1773         const V1_3::OptionalTimePoint& deadline,
1774         const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, executeSynchronously_1_3_cb cb) {
1775     const V1_3::Model* model = getModel();
1776     auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
1777             mSubgraph, mOperands.data(), request, measure, *model, deadline, loopTimeoutDuration);
1778     cb(status, std::move(outputShapes), timing);
1779     return hardware::Void();
1780 }
1781 
1782 // The sample driver will finish the execution and then return.
executeFenced(const V1_3::Request & request,const hardware::hidl_vec<hardware::hidl_handle> & waitFor,V1_2::MeasureTiming measure,const V1_3::OptionalTimePoint & halDeadline,const V1_3::OptionalTimeoutDuration & loopTimeoutDuration,const V1_3::OptionalTimeoutDuration & duration,executeFenced_cb cb)1783 hardware::Return<void> SamplePreparedModelXNNPACK::executeFenced(
1784         const V1_3::Request& request, const hardware::hidl_vec<hardware::hidl_handle>& waitFor,
1785         V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint& halDeadline,
1786         const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
1787         const V1_3::OptionalTimeoutDuration& duration, executeFenced_cb cb) {
1788     VLOG(DRIVER) << "executeFenced(" << SHOW_IF_DEBUG(toString(request)) << ")";
1789     const V1_3::Model* model = getModel();
1790     if (!validateRequest(request, *model, /*allowUnspecifiedOutput=*/false)) {
1791         cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
1792         return hardware::Void();
1793     }
1794     const auto deadline = makeDeadline(halDeadline);
1795     if (hasDeadlinePassed(deadline)) {
1796         cb(V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, hardware::hidl_handle(nullptr), nullptr);
1797         return hardware::Void();
1798     }
1799 
1800     // Wait for the dependent events to signal
1801     for (const auto& fenceHandle : waitFor) {
1802         if (!fenceHandle.getNativeHandle()) {
1803             cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
1804             return hardware::Void();
1805         }
1806         int syncFenceFd = fenceHandle.getNativeHandle()->data[0];
1807         if (syncWait(syncFenceFd, -1) != FenceState::SIGNALED) {
1808             LOG(ERROR) << "syncWait failed";
1809             cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
1810             return hardware::Void();
1811         }
1812     }
1813     std::vector<RunTimePoolInfo> requestPoolInfos;
1814     if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
1815         cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
1816     }
1817     updateForArguments(model->main.inputIndexes, request.inputs, requestPoolInfos,
1818                        mOperands.data());
1819     updateForArguments(model->main.outputIndexes, request.outputs, requestPoolInfos,
1820                        mOperands.data());
1821     auto status = mSubgraph->Invoke(mOperands.data());
1822     VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
1823     if (status == V1_3::ErrorStatus::NONE) {
1824         VLOG(DRIVER) << "Completed run normally";
1825         for (auto& runtimeInfo : requestPoolInfos) {
1826             runtimeInfo.flush();
1827         }
1828     }
1829 
1830     sp<SampleFencedExecutionCallback> fencedExecutionCallback =
1831             new SampleFencedExecutionCallback(kNoTiming, kNoTiming, status);
1832     cb(status, hardware::hidl_handle(nullptr), fencedExecutionCallback);
1833     return hardware::Void();
1834 }
1835 
1836 class SampleDriverFloatXNNPACK : public SampleDriverPartial {
1837    public:
SampleDriverFloatXNNPACK(const std::string & name)1838     SampleDriverFloatXNNPACK(const std::string& name) : SampleDriverPartial(name.c_str()) {}
1839     hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override;
1840     hardware::Return<V1_0::ErrorStatus> prepareModel(
1841             const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override;
1842     hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
1843             const V1_1::Model& model, V1_1::ExecutionPreference preference,
1844             const sp<V1_0::IPreparedModelCallback>& callback) override;
1845     hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
1846             const V1_2::Model& model, V1_1::ExecutionPreference preference,
1847             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1848             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1849             const sp<V1_2::IPreparedModelCallback>& callback) override;
1850     hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
1851             const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
1852             const V1_3::OptionalTimePoint& deadline,
1853             const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1854             const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1855             const sp<V1_3::IPreparedModelCallback>& callback) override;
1856     hardware::Return<void> allocate(
1857             const V1_3::BufferDesc& desc,
1858             const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
1859             const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
1860             const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) override;
1861 
1862    private:
1863     std::vector<bool> getSupportedOperationsImpl(const V1_3::Model& model) const override;
1864 };
1865 
1866 template <typename T_Model, typename T_IPreparedModelCallback>
prepareModelXNNPACK(const T_Model & model,const SampleDriver * driver,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const sp<T_IPreparedModelCallback> & callback)1867 V1_3::ErrorStatus prepareModelXNNPACK(const T_Model& model, const SampleDriver* driver,
1868                                       V1_1::ExecutionPreference preference, V1_3::Priority priority,
1869                                       const V1_3::OptionalTimePoint& deadline,
1870                                       const sp<T_IPreparedModelCallback>& callback) {
1871     const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
1872     if (callback.get() == nullptr) {
1873         LOG(ERROR) << "invalid callback passed to prepareModelBase";
1874         return V1_3::ErrorStatus::INVALID_ARGUMENT;
1875     }
1876     if (VLOG_IS_ON(DRIVER)) {
1877         VLOG(DRIVER) << "prepareModelBase";
1878         logModelToInfo(model);
1879     }
1880     if (!validateModel(model) || !validateExecutionPreference(preference) ||
1881         !validatePriority(priority)) {
1882         notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
1883         return V1_3::ErrorStatus::INVALID_ARGUMENT;
1884     }
1885 
1886     // asynchronously prepare the model from a new, detached thread
1887     std::thread([model, driver, preference, userId, priority, callback] {
1888         sp<SamplePreparedModelXNNPACK> preparedModel = new SamplePreparedModelXNNPACK(
1889                 convertToV1_3(model), driver, preference, userId, priority);
1890         if (!preparedModel->initialize()) {
1891             notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
1892             return;
1893         }
1894         notify(callback, V1_3::ErrorStatus::NONE, preparedModel);
1895     }).detach();
1896 
1897     return V1_3::ErrorStatus::NONE;
1898 }
1899 
prepareModel(const V1_0::Model & model,const sp<V1_0::IPreparedModelCallback> & callback)1900 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel(
1901         const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) {
1902     const V1_3::ErrorStatus status =
1903             prepareModelXNNPACK(model, this, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER,
1904                                 kDefaultPriority13, {}, callback);
1905     return convertToV1_0(status);
1906 }
1907 
prepareModel_1_1(const V1_1::Model & model,V1_1::ExecutionPreference preference,const sp<V1_0::IPreparedModelCallback> & callback)1908 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_1(
1909         const V1_1::Model& model, V1_1::ExecutionPreference preference,
1910         const sp<V1_0::IPreparedModelCallback>& callback) {
1911     const V1_3::ErrorStatus status =
1912             prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
1913     return convertToV1_0(status);
1914 }
1915 
prepareModel_1_2(const V1_2::Model & model,V1_1::ExecutionPreference preference,const hardware::hidl_vec<hardware::hidl_handle> &,const hardware::hidl_vec<hardware::hidl_handle> &,const HalCacheToken &,const sp<V1_2::IPreparedModelCallback> & callback)1916 hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_2(
1917         const V1_2::Model& model, V1_1::ExecutionPreference preference,
1918         const hardware::hidl_vec<hardware::hidl_handle>&,
1919         const hardware::hidl_vec<hardware::hidl_handle>&, const HalCacheToken&,
1920         const sp<V1_2::IPreparedModelCallback>& callback) {
1921     const V1_3::ErrorStatus status =
1922             prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
1923     return convertToV1_0(status);
1924 }
1925 
prepareModel_1_3(const V1_3::Model & model,V1_1::ExecutionPreference preference,V1_3::Priority priority,const V1_3::OptionalTimePoint & deadline,const hardware::hidl_vec<hardware::hidl_handle> & modelCache,const hardware::hidl_vec<hardware::hidl_handle> & dataCache,const HalCacheToken & token,const sp<V1_3::IPreparedModelCallback> & callback)1926 hardware::Return<V1_3::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_3(
1927         const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
1928         const V1_3::OptionalTimePoint& deadline,
1929         const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
1930         const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
1931         const sp<V1_3::IPreparedModelCallback>& callback) {
1932     return prepareModelXNNPACK(model, this, preference, priority, deadline, callback);
1933 }
1934 
getCapabilities_1_3(getCapabilities_1_3_cb cb)1935 hardware::Return<void> SampleDriverFloatXNNPACK::getCapabilities_1_3(getCapabilities_1_3_cb cb) {
1936     android::nn::initVLogMask();
1937     VLOG(DRIVER) << "SampleDriverFloatXNNPACK::getCapabilities()";
1938 
1939     V1_3::Capabilities capabilities = {
1940             .relaxedFloat32toFloat16PerformanceScalar = {.execTime = 0.7f, .powerUsage = 1.1f},
1941             .relaxedFloat32toFloat16PerformanceTensor = {.execTime = 0.7f, .powerUsage = 1.1f},
1942             .operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({1.0f, 1.0f}),
1943             .ifPerformance = {.execTime = 1.0f, .powerUsage = 1.0f},
1944             .whilePerformance = {.execTime = 1.0f, .powerUsage = 1.0f}};
1945     update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
1946            {.execTime = 0.8f, .powerUsage = 1.2f});
1947     update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
1948            {.execTime = 0.8f, .powerUsage = 1.2f});
1949 
1950     cb(V1_3::ErrorStatus::NONE, capabilities);
1951     return hardware::Void();
1952 }
1953 
getSupportedOperationsImpl(const V1_3::Model & model) const1954 std::vector<bool> SampleDriverFloatXNNPACK::getSupportedOperationsImpl(
1955         const V1_3::Model& model) const {
1956     std::vector<RunTimePoolInfo> poolInfos;
1957     setRunTimePoolInfosFromCanonicalMemories(&poolInfos, uncheckedConvert(model.pools));
1958     auto operands = initializeRunTimeInfo(model.main, poolInfos, &model.operandValues);
1959     const size_t count = model.main.operations.size();
1960     std::vector<bool> supported(count);
1961     for (size_t i = 0; i < count; i++) {
1962         bool isSupportedOp = false;
1963         const V1_3::Operation& operation = model.main.operations[i];
1964         if (Subgraph::VisitNode(/*subgraph=*/nullptr, operation, operands.data(), {}) ==
1965             V1_3::ErrorStatus::NONE) {
1966             isSupportedOp = true;
1967         }
1968         supported[i] = isSupportedOp;
1969     }
1970     return supported;
1971 }
1972 
allocate(const V1_3::BufferDesc & desc,const hardware::hidl_vec<sp<V1_3::IPreparedModel>> & preparedModels,const hardware::hidl_vec<V1_3::BufferRole> & inputRoles,const hardware::hidl_vec<V1_3::BufferRole> & outputRoles,allocate_cb cb)1973 hardware::Return<void> SampleDriverFloatXNNPACK::allocate(
1974         const V1_3::BufferDesc& desc,
1975         const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
1976         const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
1977         const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) {
1978     VLOG(DRIVER) << "SampleDriverFloatXNNPACK::allocate not supported";
1979     constexpr uint32_t kInvalidBufferToken = 0;
1980     cb(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr, kInvalidBufferToken);
1981     return hardware::Void();
1982 }
1983 
1984 }  // namespace sample_driver
1985 }  // namespace nn
1986 }  // namespace android
1987 
1988 using android::sp;
1989 using android::nn::sample_driver::SampleDriverFloatXNNPACK;
1990 
main()1991 int main() {
1992     const std::string name = "nnapi-sample_float_xnnpack";
1993     const auto driver = sp<SampleDriverFloatXNNPACK>::make(name);
1994     xnn_status status = xnn_initialize(/*allocator=*/nullptr);
1995     if (status != xnn_status_success) {
1996         return 0;
1997     }
1998     return run(driver, name);
1999 }
2000