1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <NeuralNetworks.h>
18 #include <android/log.h>
19 #include <android/sharedmem.h>
20 #include <audio_utils/float_test_utils.h>
21 #include <gtest/gtest.h>
22 #include <sys/mman.h>
23 #include <unistd.h>
24 #include <utils/Log.h>
25 
26 #include <cmath>
27 #include <functional>
28 #include <numeric>
29 #include <string>
30 #include <vector>
31 
32 #define FLOAT_EPISILON (1e-6)
33 #undef LOG_TAG
34 #define LOG_TAG "audio_nnapi_tests"
35 
36 template <typename T>
product(const std::vector<T> & values)37 static T product(const std::vector<T>& values) {
38     return std::accumulate(values.begin(), values.end(), T(1), std::multiplies<>());
39 }
40 
41 // In Android 35, NNAPI is deprecated b/283927643
42 //
43 // The deprecation hasn't made it to the developer's site:
44 // https://developer.android.com/ndk/reference/group/neural-networks
45 // External clients may bundle tflite themselves or access through
46 // play store services
47 // https://www.tensorflow.org/lite/android/play_services
48 //
49 // This test is taken from the Android NDK samples here:
50 // https://github.com/android/ndk-samples/blob/main/nn-samples/basic/src/main/cpp/simple_model.cpp
51 
52 /**
53  * AddMulModel
54  *
55  * Build up the hardcoded graph of tensor inputs to output.
56  *
57  * tensor0 ---+
58  *            +--- ADD ---> intermediateOutput0 ---+
59  * tensor1 ---+                                    |
60  *                                                 +--- MUL---> output
61  * tensor2 ---+                                    |
62  *            +--- ADD ---> intermediateOutput1 ---+
63  * tensor3 ---+
64  *
65  * Operands are a tensor specified by std::vector<uint32_t> dimensions
66  * to CreateModel, and may be multidimensional.
67  */
68 
69 class AddMulModel {
70   public:
71     ~AddMulModel();
72 
73     bool CreateModel(const std::vector<uint32_t>& dimensions);
74     bool Compute(float inputValue0, float inputValue1, float inputValue2, float inputValue3,
75                  float* result);
76 
77   private:
78     ANeuralNetworksModel* model_ = nullptr;
79     ANeuralNetworksCompilation* compilation_ = nullptr;
80 
81     // For the purposes of member variables we use
82     // "inputN" to correspond to "tensorN".
83     //
84     // We send input0 and input2 directly
85     // and input1 and input3 through shared memory
86     // which need declaration here.
87 
88     ANeuralNetworksMemory* memoryInput1_ = nullptr;
89     ANeuralNetworksMemory* memoryInput3_ = nullptr;
90     ANeuralNetworksMemory* memoryOutput_ = nullptr;
91 
92     int inputTensor1Fd_ = -1;
93     int inputTensor3Fd_ = -1;
94     int outputTensorFd_ = -1;
95 
96     float* inputTensor1Ptr_ = nullptr;
97     float* inputTensor3Ptr_ = nullptr;
98     float* outputTensorPtr_ = nullptr;
99 
100     uint32_t tensorSize_ = 0;
101 };
102 
~AddMulModel()103 AddMulModel::~AddMulModel() {
104     ANeuralNetworksCompilation_free(compilation_);
105     ANeuralNetworksModel_free(model_);
106     ANeuralNetworksMemory_free(memoryInput1_);
107     ANeuralNetworksMemory_free(memoryInput3_);
108     ANeuralNetworksMemory_free(memoryOutput_);
109 
110     if (inputTensor1Ptr_ != nullptr) {
111         munmap(inputTensor1Ptr_, tensorSize_ * sizeof(float));
112         inputTensor1Ptr_ = nullptr;
113     }
114     if (inputTensor3Ptr_ != nullptr) {
115         munmap(inputTensor3Ptr_, tensorSize_ * sizeof(float));
116         inputTensor3Ptr_ = nullptr;
117     }
118     if (outputTensorPtr_ != nullptr) {
119         munmap(outputTensorPtr_, tensorSize_ * sizeof(float));
120         outputTensorPtr_ = nullptr;
121     }
122 
123     if (inputTensor1Fd_ != -1) close(inputTensor1Fd_);
124     if (inputTensor3Fd_ != -1) close(inputTensor3Fd_);
125     if (outputTensorFd_ != -1) close(outputTensorFd_);
126 }
127 
128 /**
129  * Create a graph that consists of three operations: two additions and a
130  * multiplication.
131  * The sums created by the additions are the inputs to the multiplication. In
132  * essence, we are creating a graph that computes:
133  *        (tensor0 + tensor1) * (tensor2 + tensor3).
134  *
135  * tensor0 ---+
136  *            +--- ADD ---> intermediateOutput0 ---+
137  * tensor1 ---+                                    |
138  *                                                 +--- MUL---> output
139  * tensor2 ---+                                    |
140  *            +--- ADD ---> intermediateOutput1 ---+
141  * tensor3 ---+
142  *
143  * All four tensors are inputs to the model. Their
144  * values will be provided when we execute the model. These values can change
145  * from execution to execution.
146  *
147  * Besides the four input tensors, an optional fused activation function can
148  * also be defined for ADD and MUL. In this example, we'll simply set it to
149  * NONE.
150  *
151  * The graph then has 8 operands:
152  *  - 4 tensors that are inputs to the model. These are fed to the two
153  *      ADD operations.
154  *  - 1 fuse activation operand reused for the ADD operations and the MUL
155  * operation.
156  *  - 2 intermediate tensors, representing outputs of the ADD operations and
157  * inputs to the MUL operation.
158  *  - 1 model output.
159  *
160  * @return true for success, false otherwise
161  */
CreateModel(const std::vector<uint32_t> & dimensions)162 bool AddMulModel::CreateModel(const std::vector<uint32_t>& dimensions) {
163     int32_t status;
164 
165     // Create the ANeuralNetworksModel handle.
166     status = ANeuralNetworksModel_create(&model_);
167     if (status != ANEURALNETWORKS_NO_ERROR) {
168         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksModel_create failed");
169         return false;
170     }
171 
172     // everything else is computed.
173     tensorSize_ = product(dimensions);
174 
175     ANeuralNetworksOperandType float32TensorType{
176             .type = ANEURALNETWORKS_TENSOR_FLOAT32,
177             .dimensionCount = static_cast<uint32_t>(dimensions.size()),
178             .dimensions = dimensions.data(),
179             .scale = 0.0f,
180             .zeroPoint = 0,
181     };
182     ANeuralNetworksOperandType scalarInt32Type{
183             .type = ANEURALNETWORKS_INT32,
184             .dimensionCount = 0,
185             .dimensions = nullptr,
186             .scale = 0.0f,
187             .zeroPoint = 0,
188     };
189 
190     /**
191      * Add operands and operations to construct the model.
192      *
193      * Operands are implicitly identified by the order in which they are added to
194      * the model, starting from 0.
195      *
196      * These indexes are not returned by the model_addOperand call. The
197      * application must manage these values. Here, we use opIdx to do the
198      * bookkeeping.
199      */
200     uint32_t opIdx = 0;
201 
202     // We first add the operand for the NONE activation function, and set its
203     // value to ANEURALNETWORKS_FUSED_NONE.
204     // This constant scalar operand will be used for all 3 operations.
205     status = ANeuralNetworksModel_addOperand(model_, &scalarInt32Type);
206     uint32_t fusedActivationFuncNone = opIdx++;
207     if (status != ANEURALNETWORKS_NO_ERROR) {
208         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
209                             "ANeuralNetworksModel_addOperand failed for operand (%d)",
210                             fusedActivationFuncNone);
211         return false;
212     }
213 
214     FuseCode fusedActivationCodeValue = ANEURALNETWORKS_FUSED_NONE;
215     status = ANeuralNetworksModel_setOperandValue(model_, fusedActivationFuncNone,
216                                                   &fusedActivationCodeValue,
217                                                   sizeof(fusedActivationCodeValue));
218     if (status != ANEURALNETWORKS_NO_ERROR) {
219         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
220                             "ANeuralNetworksModel_setOperandValue failed for operand (%d)",
221                             fusedActivationFuncNone);
222         return false;
223     }
224 
225     // Add operands for the tensors.
226 
227     // Add 4 input tensors.
228     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
229     uint32_t tensor0 = opIdx++;
230     if (status != ANEURALNETWORKS_NO_ERROR) {
231         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
232                             "ANeuralNetworksModel_addOperand failed for operand (%d)", tensor0);
233         return false;
234     }
235 
236     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
237     uint32_t tensor1 = opIdx++;
238     if (status != ANEURALNETWORKS_NO_ERROR) {
239         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
240                             "ANeuralNetworksModel_addOperand failed for operand (%d)", tensor1);
241         return false;
242     }
243 
244     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
245     uint32_t tensor2 = opIdx++;
246     if (status != ANEURALNETWORKS_NO_ERROR) {
247         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
248                             "ANeuralNetworksModel_addOperand failed for operand (%d)", tensor2);
249         return false;
250     }
251 
252     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
253     uint32_t tensor3 = opIdx++;
254     if (status != ANEURALNETWORKS_NO_ERROR) {
255         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
256                             "ANeuralNetworksModel_addOperand failed for operand (%d)", tensor3);
257         return false;
258     }
259 
260     // intermediateOutput0 is the output of the first ADD operation.
261     // Its value is computed during execution.
262     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
263     uint32_t intermediateOutput0 = opIdx++;
264     if (status != ANEURALNETWORKS_NO_ERROR) {
265         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
266                             "ANeuralNetworksModel_addOperand failed for operand (%d)",
267                             intermediateOutput0);
268         return false;
269     }
270 
271     // intermediateOutput1 is the output of the second ADD operation.
272     // Its value is computed during execution.
273     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
274     uint32_t intermediateOutput1 = opIdx++;
275     if (status != ANEURALNETWORKS_NO_ERROR) {
276         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
277                             "ANeuralNetworksModel_addOperand failed for operand (%d)",
278                             intermediateOutput1);
279         return false;
280     }
281 
282     // multiplierOutput is the output of the MUL operation.
283     // Its value will be computed during execution.
284     status = ANeuralNetworksModel_addOperand(model_, &float32TensorType);
285     uint32_t multiplierOutput = opIdx++;
286     if (status != ANEURALNETWORKS_NO_ERROR) {
287         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
288                             "ANeuralNetworksModel_addOperand failed for operand (%d)",
289                             multiplierOutput);
290         return false;
291     }
292 
293     // Add the first ADD operation.
294     std::vector<uint32_t> add1InputOperands = {
295             tensor0,
296             tensor1,
297             fusedActivationFuncNone,
298     };
299     status =
300             ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_ADD, add1InputOperands.size(),
301                                               add1InputOperands.data(), 1, &intermediateOutput0);
302     if (status != ANEURALNETWORKS_NO_ERROR) {
303         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
304                             "ANeuralNetworksModel_addOperation failed for ADD_1");
305         return false;
306     }
307 
308     // Add the second ADD operation.
309     // Note the fusedActivationFuncNone is used again.
310     std::vector<uint32_t> add2InputOperands = {
311             tensor2,
312             tensor3,
313             fusedActivationFuncNone,
314     };
315     status =
316             ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_ADD, add2InputOperands.size(),
317                                               add2InputOperands.data(), 1, &intermediateOutput1);
318     if (status != ANEURALNETWORKS_NO_ERROR) {
319         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
320                             "ANeuralNetworksModel_addOperation failed for ADD_2");
321         return false;
322     }
323 
324     // Add the MUL operation.
325     // Note that intermediateOutput0 and intermediateOutput1 are specified
326     // as inputs to the operation.
327     std::vector<uint32_t> mulInputOperands = {intermediateOutput0, intermediateOutput1,
328                                               fusedActivationFuncNone};
329     status = ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_MUL, mulInputOperands.size(),
330                                                mulInputOperands.data(), 1, &multiplierOutput);
331     if (status != ANEURALNETWORKS_NO_ERROR) {
332         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
333                             "ANeuralNetworksModel_addOperation failed for MUL");
334         return false;
335     }
336 
337     // Identify the input and output tensors to the model.
338     // Inputs: {tensor0, tensor1, tensor2, tensor3}
339     // Outputs: {multiplierOutput}
340     std::vector<uint32_t> modelInputOperands = {
341             tensor0,
342             tensor1,
343             tensor2,
344             tensor3,
345     };
346     status = ANeuralNetworksModel_identifyInputsAndOutputs(
347             model_, modelInputOperands.size(), modelInputOperands.data(), 1, &multiplierOutput);
348     if (status != ANEURALNETWORKS_NO_ERROR) {
349         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
350                             "ANeuralNetworksModel_identifyInputsAndOutputs failed");
351         return false;
352     }
353 
354     // Required for TPU
355     status = ANeuralNetworksModel_relaxComputationFloat32toFloat16(model_, true);
356     if (status != ANEURALNETWORKS_NO_ERROR) {
357         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
358                             "ANeuralNetworksModel_relaxComputationFloat32toFloat16 failed");
359         return false;
360     }
361 
362     // Finish constructing the model.
363     // The values of constant and intermediate operands cannot be altered after
364     // the finish function is called.
365     status = ANeuralNetworksModel_finish(model_);
366     if (status != ANEURALNETWORKS_NO_ERROR) {
367         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksModel_finish failed");
368         return false;
369     }
370 
371     // Create the ANeuralNetworksCompilation object for the constructed model.
372     status = ANeuralNetworksCompilation_create(model_, &compilation_);
373     if (status != ANEURALNETWORKS_NO_ERROR) {
374         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksCompilation_create failed");
375         return false;
376     }
377 
378     // Set the preference for the compilation, so that the runtime and drivers
379     // can make better decisions.
380     // Here we prefer to get the answer quickly, so we choose
381     // ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER.
382     status = ANeuralNetworksCompilation_setPreference(compilation_,
383                                                       ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
384     if (status != ANEURALNETWORKS_NO_ERROR) {
385         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
386                             "ANeuralNetworksCompilation_setPreference failed");
387         return false;
388     }
389 
390     // Finish the compilation.
391     status = ANeuralNetworksCompilation_finish(compilation_);
392     if (status != ANEURALNETWORKS_NO_ERROR) {
393         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksCompilation_finish failed");
394         return false;
395     }
396 
397     inputTensor1Fd_ = ASharedMemory_create("input1", tensorSize_ * sizeof(float));
398     EXPECT_NE(-1, inputTensor1Fd_);
399     inputTensor1Ptr_ =
400             reinterpret_cast<float*>(mmap(nullptr, tensorSize_ * sizeof(float),
401                                           PROT_READ | PROT_WRITE, MAP_SHARED, inputTensor1Fd_, 0));
402 
403     EXPECT_NE(MAP_FAILED, (void*)inputTensor1Ptr_);
404 
405     status = ANeuralNetworksMemory_createFromFd(tensorSize_ * sizeof(float), PROT_READ,
406                                                 inputTensor1Fd_, 0, &memoryInput1_);
407     if (status != ANEURALNETWORKS_NO_ERROR) {
408         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
409                             "ANeuralNetworksMemory_createFromFd failed for Input1");
410         return false;
411     }
412 
413     inputTensor3Fd_ = ASharedMemory_create("input3", tensorSize_ * sizeof(float));
414     EXPECT_NE(-1, inputTensor3Fd_);
415     inputTensor3Ptr_ =
416             reinterpret_cast<float*>(mmap(nullptr, tensorSize_ * sizeof(float),
417                                           PROT_READ | PROT_WRITE, MAP_SHARED, inputTensor3Fd_, 0));
418     EXPECT_NE(MAP_FAILED, (void*)inputTensor3Ptr_);
419     status = ANeuralNetworksMemory_createFromFd(tensorSize_ * sizeof(float), PROT_READ,
420                                                 inputTensor3Fd_, 0, &memoryInput3_);
421     if (status != ANEURALNETWORKS_NO_ERROR) {
422         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
423                             "ANeuralNetworksMemory_createFromFd failed for Input3");
424         return false;
425     }
426 
427     // Set the output tensor that will be filled by executing the model.
428     // We use shared memory here to minimize the copies needed for getting the
429     // output data.
430     outputTensorFd_ = ASharedMemory_create("output", tensorSize_ * sizeof(float));
431     EXPECT_NE(-1, outputTensorFd_);
432 
433     outputTensorPtr_ = reinterpret_cast<float*>(
434             mmap(nullptr, tensorSize_ * sizeof(float), PROT_READ, MAP_SHARED, outputTensorFd_, 0));
435     EXPECT_NE(MAP_FAILED, (void*)outputTensorPtr_);
436     status = ANeuralNetworksMemory_createFromFd(tensorSize_ * sizeof(float), PROT_READ | PROT_WRITE,
437                                                 outputTensorFd_, 0, &memoryOutput_);
438     if (status != ANEURALNETWORKS_NO_ERROR) {
439         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
440                             "ANeuralNetworksMemory_createFromFd failed for Output");
441         return false;
442     }
443 
444     return true;
445 }
446 
447 /**
448  * Compute with the given input data.
449  *
450  * @param inputValue0 value to fill tensor0
451  * @param inputValue1 value to fill tensor1
452  * @param inputValue2 value to fill tensor2
453  * @param inputValue3 value to fill tensor3
454  * @param result is the output value
455  * @return true on success
456  */
Compute(float inputValue0,float inputValue1,float inputValue2,float inputValue3,float * result)457 bool AddMulModel::Compute(float inputValue0, float inputValue1, float inputValue2,
458                           float inputValue3, float* result) {
459     if (!result) {
460         return false;
461     }
462 
463     // Create an ANeuralNetworksExecution object from the compiled model.
464     // Note:
465     //   1. All the input and output data are tied to the ANeuralNetworksExecution
466     //   object.
467     //   2. Multiple concurrent execution instances could be created from the same
468     //   compiled model.
469     // This sample only uses one execution of the compiled model.
470     ANeuralNetworksExecution* execution;
471     int32_t status = ANeuralNetworksExecution_create(compilation_, &execution);
472     if (status != ANEURALNETWORKS_NO_ERROR) {
473         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksExecution_create failed");
474         return false;
475     }
476 
477     // Set all the elements of the first input tensor (tensor0) to the same value
478     // as inputValue. It's not a realistic example but it shows how to pass a
479     // small tensor to an execution.
480     std::vector<float> inputTensor0(tensorSize_, inputValue0);
481 
482     // Tell the execution to associate inputTensor0 to the first of the model
483     // inputs. Note that the index "0" here means the first operand of the
484     // modelInput list.
485     status = ANeuralNetworksExecution_setInput(execution, 0, nullptr, inputTensor0.data(),
486                                                tensorSize_ * sizeof(float));
487     if (status != ANEURALNETWORKS_NO_ERROR) {
488         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
489                             "ANeuralNetworksExecution_setInput failed for input1");
490         return false;
491     }
492 
493     // ALTERNATIVELY
494     // Set the values of the second input operand (tensor1) to be inputValue1.
495     // In reality, the values in the shared memory region will be manipulated by
496     // other modules or processes.
497 
498     for (size_t i = 0; i < tensorSize_; i++) {
499         inputTensor1Ptr_[i] = inputValue1;
500     }
501     status = ANeuralNetworksExecution_setInputFromMemory(execution, 1, nullptr, memoryInput1_, 0,
502                                                          tensorSize_ * sizeof(float));
503     if (status != ANEURALNETWORKS_NO_ERROR) {
504         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
505                             "ANeuralNetworksExecution_setInputFromMemory failed for input1");
506         return false;
507     }
508 
509     // Set all the elements of the third input tensor (tensor2) to the same value
510     // as inputValue2. It's not a realistic example but it shows how to pass a
511     // small tensor to an execution.
512     std::vector<float> inputTensor2(tensorSize_, inputValue2);
513 
514     status = ANeuralNetworksExecution_setInput(execution, 2, nullptr, inputTensor2.data(),
515                                                tensorSize_ * sizeof(float));
516     if (status != ANEURALNETWORKS_NO_ERROR) {
517         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
518                             "ANeuralNetworksExecution_setInput failed for input1");
519         return false;
520     }
521 
522     // ALTERNATIVELY
523     // Set the values of the second input operand (tensor1) to be inputValue1.
524     // In reality, the values in the shared memory region will be manipulated by
525     // other modules or processes.
526 
527     for (size_t i = 0; i < tensorSize_; i++) {
528         inputTensor3Ptr_[i] = inputValue3;
529     }
530 
531     status = ANeuralNetworksExecution_setInputFromMemory(execution, 3, nullptr, memoryInput3_, 0,
532                                                          tensorSize_ * sizeof(float));
533     if (status != ANEURALNETWORKS_NO_ERROR) {
534         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
535                             "ANeuralNetworksExecution_setInputFromMemory failed for input3");
536         return false;
537     }
538 
539     // Set the output tensor that will be filled by executing the model.
540     // We use shared memory here to minimize the copies needed for getting the
541     // output data.
542 
543     status = ANeuralNetworksExecution_setOutputFromMemory(execution, 0, nullptr, memoryOutput_, 0,
544                                                           tensorSize_ * sizeof(float));
545     if (status != ANEURALNETWORKS_NO_ERROR) {
546         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
547                             "ANeuralNetworksExecution_setOutputFromMemory failed for output");
548         return false;
549     }
550 
551     // Start the execution of the model.
552     // Note that the execution here is asynchronous, and an ANeuralNetworksEvent
553     // object will be created to monitor the status of the execution.
554     ANeuralNetworksEvent* event = nullptr;
555     status = ANeuralNetworksExecution_startCompute(execution, &event);
556     if (status != ANEURALNETWORKS_NO_ERROR) {
557         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
558                             "ANeuralNetworksExecution_startCompute failed");
559         return false;
560     }
561 
562     // Wait until the completion of the execution. This could be done on a
563     // different thread. By waiting immediately, we effectively make this a
564     // synchronous call.
565     status = ANeuralNetworksEvent_wait(event);
566     if (status != ANEURALNETWORKS_NO_ERROR) {
567         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksEvent_wait failed");
568         return false;
569     }
570 
571     ANeuralNetworksEvent_free(event);
572     ANeuralNetworksExecution_free(execution);
573 
574     // Validate the results.
575     const float goldenRef = (inputValue0 + inputValue1) * (inputValue2 + inputValue3);
576     float* outputTensorPtr = reinterpret_cast<float*>(
577             mmap(nullptr, tensorSize_ * sizeof(float), PROT_READ, MAP_SHARED, outputTensorFd_, 0));
578     for (size_t idx = 0; idx < tensorSize_; idx++) {
579         float delta = outputTensorPtr[idx] - goldenRef;
580         delta = (delta < 0.0f) ? (-delta) : delta;
581         if (delta > FLOAT_EPISILON) {
582             __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
583                                 "Output computation Error: output0(%f), delta(%f) @ idx(%zu)",
584                                 outputTensorPtr[0], delta, idx);
585         }
586     }
587     *result = outputTensorPtr[0];
588     return true;
589 }
590 
TEST(audio_nnapi_tests,AddMulModel)591 TEST(audio_nnapi_tests, AddMulModel) {
592     AddMulModel model;
593     float result = 0.f;
594     EXPECT_EQ(true, model.CreateModel({5, 10, 2, 2}));  // 5x10x2x2 tensor
595     EXPECT_EQ(true, model.Compute(10.f, 11.f, 12.f, 13.f, &result));
596     EXPECT_EQ((10.f + 11.f) * (12.f + 13.f), result);
597 
598     EXPECT_EQ(true, model.Compute(5.f, 6.f, 7.f, 8.f, &result));
599     EXPECT_EQ((5.f + 6.f) * (7.f + 8.f), result);
600 
601 #if 0
602     // Enable this for precision testing.
603 
604     // Precision test for CPU
605     // The ARM does subnormals
606     //
607     // single precision float
608     EXPECT_EQ(127, android::audio_utils::test::test_max_exponent<float>());
609     EXPECT_EQ(-149, android::audio_utils::test::test_min_exponent<float>());
610     EXPECT_EQ(23, android::audio_utils::test::test_mantissa<float>());
611 
612     // double precision float
613     EXPECT_EQ(1023, android::audio_utils::test::test_max_exponent<double>());
614     EXPECT_EQ(-1074, android::audio_utils::test::test_min_exponent<double>());
615     EXPECT_EQ(52, android::audio_utils::test::test_mantissa<double>());
616 
617     // Precision test for Edge TPU
618     // Is it float16 or bfloat16?
619     //
620     // Edge TPU appears to be float16 at the moment, with one bit
621     // of subnormal.
622     //
623     // max_exponent: 15
624     // min_exponent: -15
625     // mantissa: 10
626     //
627     // functor to double input.
628     auto twice = [&model](float x) {
629       float result = 0;
630       model.Compute(x, x, 1.f, 0.f, &result);
631       return result;
632     };
633     EXPECT_EQ(15, android::audio_utils::test::test_max_exponent<float>(twice));
634 
635     // functor to halve input.
636     auto half = [&model](float x) {
637       float result = 0;
638       model.Compute(x, 0, 0.5f, 0.f, &result);
639       return result;
640     };
641     EXPECT_EQ(-15, android::audio_utils::test::test_min_exponent<float>(half));
642 
643     // functor to increment input.
644     auto inc = [&model](float x) {
645       float result = 0;
646       model.Compute(x, 1.f, 1.f, 0.f, &result);
647       return result;
648     };
649     EXPECT_EQ(10, android::audio_utils::test::test_mantissa<float>(inc));
650 #endif
651 }
652