1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <NeuralNetworks.h>
18 #include <android/log.h>
19 #include <android/sharedmem.h>
20 #include <gtest/gtest.h>
21 #include <sys/mman.h>
22 #include <unistd.h>
23 #include <utils/Log.h>
24
25 #include <cmath>
26 #include <functional>
27 #include <numeric>
28 #include <string>
29 #include <vector>
30
31 #define FLOAT_EPISILON (1e-6)
32 #undef LOG_TAG
33 #define LOG_TAG "audio_nnapi_tests"
34
35 template <typename T>
product(const std::vector<T> & values)36 static T product(const std::vector<T>& values) {
37 return std::accumulate(values.begin(), values.end(), T(1), std::multiplies<>());
38 }
39
40 // In Android 35, NNAPI is deprecated b/283927643
41 //
42 // The deprecation hasn't made it to the developer's site:
43 // https://developer.android.com/ndk/reference/group/neural-networks
44 // External clients may bundle tflite themselves or access through
45 // play store services
46 // https://www.tensorflow.org/lite/android/play_services
47
48 /**
49 * Conv2DModel
50 *
51 * Build up the hardcoded graph of
52 *
53 * input ---+
54 * +--- CONV2D ---> output
55 * filter ---+
56 *
57 * Operands are given by the dimensions of the input and filter tensor.
58 *
59 * input: A 4-D tensor, of shape [batches, height, width, depth_in],
60 * specifying the input.
61 * Since NNAPI feature level 3, zero batches is supported for this tensor.
62 * * filter: A 4-D tensor, of shape
63 * [depth_out, filter_height, filter_width, depth_in], specifying the
64 * filter.
65 * * bias: A 1-D tensor, of shape [depth_out], specifying the bias.
66 *
67 * output 4-D tensor, of shape
68 * [batches, out_height, out_width, depth_out].
69 */
70
71 class Conv2DModel {
72 public:
73 ~Conv2DModel();
74
75 bool CreateModel(uint32_t batches, uint32_t height, uint32_t width, uint32_t filter_height,
76 uint32_t filter_width, uint32_t depth_in, uint32_t depth_out, float biasValue);
77
78 bool Compute(float inputValue, float filterValue, float* result);
79
80 private:
81 ANeuralNetworksModel* model_ = nullptr;
82 ANeuralNetworksCompilation* compilation_ = nullptr;
83
84 ANeuralNetworksMemory* memoryInput_ = nullptr;
85 ANeuralNetworksMemory* memoryFilter_ = nullptr;
86 ANeuralNetworksMemory* memoryOutput_ = nullptr;
87
88 uint32_t inputSize_ = 0;
89 uint32_t filterSize_ = 0;
90 uint32_t biasSize_ = 0;
91 uint32_t outputSize_ = 0;
92
93 std::vector<uint32_t> inputDimensions_;
94 std::vector<uint32_t> filterDimensions_;
95 std::vector<uint32_t> outputDimensions_;
96
97 int inputTensorFd_ = -1;
98 int filterTensorFd_ = -1;
99 int outputTensorFd_ = -1;
100
101 float* inputTensorPtr_ = nullptr;
102 float* filterTensorPtr_ = nullptr;
103 float* outputTensorPtr_ = nullptr;
104 };
105
~Conv2DModel()106 Conv2DModel::~Conv2DModel() {
107 ANeuralNetworksCompilation_free(compilation_);
108 ANeuralNetworksModel_free(model_);
109 ANeuralNetworksMemory_free(memoryInput_);
110 ANeuralNetworksMemory_free(memoryFilter_);
111 ANeuralNetworksMemory_free(memoryOutput_);
112
113 if (inputTensorPtr_ != nullptr) {
114 munmap(inputTensorPtr_, inputSize_ * sizeof(float));
115 inputTensorPtr_ = nullptr;
116 }
117 if (filterTensorPtr_ != nullptr) {
118 munmap(filterTensorPtr_, filterSize_ * sizeof(float));
119 filterTensorPtr_ = nullptr;
120 }
121 if (outputTensorPtr_ != nullptr) {
122 munmap(outputTensorPtr_, outputSize_ * sizeof(float));
123 outputTensorPtr_ = nullptr;
124 }
125
126 if (inputTensorFd_ != -1) close(inputTensorFd_);
127 if (filterTensorFd_ != -1) close(filterTensorFd_);
128 if (outputTensorFd_ != -1) close(outputTensorFd_);
129 }
130
131 /**
132 * Create a graph that consists of a 2D convolution
133 *
134 * input ---+
135 * +--- CONV2D ---> output
136 * filter ---+
137 *
138 * 2 tensors are provided as input
139 *
140 * input: A 4-D tensor, of shape [batches, height, width, depth_in],
141 * specifying the input.
142 * Since NNAPI feature level 3, zero batches is supported for this tensor.
143 * filter: A 4-D tensor, of shape
144 * [depth_out, filter_height, filter_width, depth_in], specifying the
145 * filter.
146 *
147 * Note that bias must be fixed in the model for NNAPI acceleration on TPU.
148 * bias: A 1-D tensor, of shape [depth_out], specifying the bias.
149 *
150 * output 4-D tensor, of shape
151 * [batches, out_height, out_width, depth_out].
152 *
153 * @param batches the number of samples to operate on
154 * @param height of the data input
155 * @param width of the data input
156 * @param filter_height
157 * @param filter_width
158 * @param depth_in channels of input
159 * @param depth_out channels of output
160 * @return true for success, false otherwise
161 */
CreateModel(uint32_t batches,uint32_t height,uint32_t width,uint32_t filter_height,uint32_t filter_width,uint32_t depth_in,uint32_t depth_out,float biasValue)162 bool Conv2DModel::CreateModel(uint32_t batches, uint32_t height, uint32_t width,
163 uint32_t filter_height, uint32_t filter_width, uint32_t depth_in,
164 uint32_t depth_out, float biasValue) {
165 int32_t status;
166
167 inputDimensions_ = std::vector<uint32_t>{batches, height, width, depth_in};
168 filterDimensions_ = std::vector<uint32_t>{depth_out, filter_height, filter_width, depth_in};
169 outputDimensions_ = std::vector<uint32_t>{batches, height, width, depth_out};
170
171 inputSize_ = product(inputDimensions_);
172
173 filterSize_ = product(filterDimensions_);
174
175 outputSize_ = product(outputDimensions_);
176
177 biasSize_ = depth_out;
178
179 // Create the ANeuralNetworksModel handle.
180 status = ANeuralNetworksModel_create(&model_);
181 if (status != ANEURALNETWORKS_NO_ERROR) {
182 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksModel_create failed");
183 return false;
184 }
185
186 ANeuralNetworksOperandType inputTensorType{
187 .type = ANEURALNETWORKS_TENSOR_FLOAT32,
188 .dimensionCount = static_cast<uint32_t>(inputDimensions_.size()),
189 .dimensions = inputDimensions_.data(),
190 .scale = 0.0f,
191 .zeroPoint = 0,
192 };
193
194 ANeuralNetworksOperandType filterTensorType{
195 .type = ANEURALNETWORKS_TENSOR_FLOAT32,
196 .dimensionCount = static_cast<uint32_t>(filterDimensions_.size()),
197 .dimensions = filterDimensions_.data(),
198 .scale = 0.0f,
199 .zeroPoint = 0,
200 };
201
202 ANeuralNetworksOperandType biasTensorType{
203 .type = ANEURALNETWORKS_TENSOR_FLOAT32,
204 .dimensionCount = 1,
205 .dimensions = &biasSize_,
206 .scale = 0.0f,
207 .zeroPoint = 0,
208 };
209
210 ANeuralNetworksOperandType outputTensorType{
211 .type = ANEURALNETWORKS_TENSOR_FLOAT32,
212 .dimensionCount = static_cast<uint32_t>(outputDimensions_.size()),
213 .dimensions = outputDimensions_.data(),
214 .scale = 0.0f,
215 .zeroPoint = 0,
216 };
217
218 ANeuralNetworksOperandType scalarInt32Type{
219 .type = ANEURALNETWORKS_INT32,
220 .dimensionCount = 0,
221 .dimensions = nullptr,
222 .scale = 0.0f,
223 .zeroPoint = 0,
224 };
225
226 /**
227 * Add operands and operations to construct the model.
228 *
229 * Operands are implicitly identified by the order in which they are added to
230 * the model, starting from 0.
231 *
232 * These indexes are not returned by the model_addOperand call. The
233 * application must manage these values. Here, we use opIdx to do the
234 * bookkeeping.
235 */
236 uint32_t opIdx = 0;
237
238 // Add the operand for the NONE activation function, and set its
239 // value to ANEURALNETWORKS_FUSED_NONE.
240 status = ANeuralNetworksModel_addOperand(model_, &scalarInt32Type);
241 uint32_t fusedActivationFuncNoneOp = opIdx++;
242 if (status != ANEURALNETWORKS_NO_ERROR) {
243 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
244 "ANeuralNetworksModel_addOperand failed for operand (%d)",
245 fusedActivationFuncNoneOp);
246 return false;
247 }
248
249 FuseCode fusedActivationCodeValue = ANEURALNETWORKS_FUSED_NONE;
250 status = ANeuralNetworksModel_setOperandValue(model_, fusedActivationFuncNoneOp,
251 &fusedActivationCodeValue,
252 sizeof(fusedActivationCodeValue));
253 if (status != ANEURALNETWORKS_NO_ERROR) {
254 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
255 "ANeuralNetworksModel_setOperandValue failed for operand (%d)",
256 fusedActivationFuncNoneOp);
257 return false;
258 }
259
260 // Add the operand for the padding code
261 // value to ANEURALNETWORKS_PADDING_SAME.
262 status = ANeuralNetworksModel_addOperand(model_, &scalarInt32Type);
263 uint32_t paddingCodeSameOp = opIdx++;
264 if (status != ANEURALNETWORKS_NO_ERROR) {
265 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
266 "ANeuralNetworksModel_addOperand failed for operand (%d)",
267 paddingCodeSameOp);
268 return false;
269 }
270
271 PaddingCode paddingCodeValue = ANEURALNETWORKS_PADDING_SAME;
272 status = ANeuralNetworksModel_setOperandValue(model_, paddingCodeSameOp, &paddingCodeValue,
273 sizeof(paddingCodeValue));
274 if (status != ANEURALNETWORKS_NO_ERROR) {
275 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
276 "ANeuralNetworksModel_setOperandValue failed for operand (%d)",
277 paddingCodeSameOp);
278 return false;
279 }
280
281 // Add the operand for one
282 status = ANeuralNetworksModel_addOperand(model_, &scalarInt32Type);
283 uint32_t oneOp = opIdx++;
284 if (status != ANEURALNETWORKS_NO_ERROR) {
285 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
286 "ANeuralNetworksModel_addOperand failed for operand (%d)", oneOp);
287 return false;
288 }
289
290 int32_t one = 1;
291 status = ANeuralNetworksModel_setOperandValue(model_, oneOp, &one, sizeof(one));
292 if (status != ANEURALNETWORKS_NO_ERROR) {
293 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
294 "ANeuralNetworksModel_setOperandValue failed for operand (%d)", oneOp);
295 return false;
296 }
297
298 // Add operands for the tensors.
299 status = ANeuralNetworksModel_addOperand(model_, &inputTensorType);
300 uint32_t inputOp = opIdx++;
301 if (status != ANEURALNETWORKS_NO_ERROR) {
302 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
303 "ANeuralNetworksModel_addOperand failed for operand (%d)", inputOp);
304 return false;
305 }
306
307 status = ANeuralNetworksModel_addOperand(model_, &filterTensorType);
308 uint32_t filterOp = opIdx++;
309 if (status != ANEURALNETWORKS_NO_ERROR) {
310 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
311 "ANeuralNetworksModel_addOperand failed for operand (%d)", filterOp);
312 return false;
313 }
314
315 status = ANeuralNetworksModel_addOperand(model_, &biasTensorType);
316 uint32_t biasOp = opIdx++;
317 if (status != ANEURALNETWORKS_NO_ERROR) {
318 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
319 "ANeuralNetworksModel_addOperand failed for operand (%d)", biasOp);
320 return false;
321 }
322
323 // A bias value that isn't constant will prevent acceleration on TPU.
324 std::vector<float> biases(biasSize_, biasValue);
325 status = ANeuralNetworksModel_setOperandValue(model_, biasOp, biases.data(),
326 biases.size() * sizeof(float));
327 if (status != ANEURALNETWORKS_NO_ERROR) {
328 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
329 "ANeuralNetworksModel_setOperandValue failed for operand (%d)", biasOp);
330 return false;
331 }
332
333 status = ANeuralNetworksModel_addOperand(model_, &outputTensorType);
334 uint32_t outputOp = opIdx++;
335 if (status != ANEURALNETWORKS_NO_ERROR) {
336 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
337 "ANeuralNetworksModel_addOperand failed for operand (%d)", outputOp);
338 return false;
339 }
340
341 // Add the CONV2D operation.
342 std::vector<uint32_t> addInputOperands = {
343 inputOp, filterOp, biasOp, paddingCodeSameOp, oneOp, oneOp, fusedActivationFuncNoneOp,
344 };
345 status = ANeuralNetworksModel_addOperation(model_, ANEURALNETWORKS_CONV_2D,
346 addInputOperands.size(), addInputOperands.data(), 1,
347 &outputOp);
348 if (status != ANEURALNETWORKS_NO_ERROR) {
349 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
350 "ANeuralNetworksModel_addOperation failed for CONV2D");
351 return false;
352 }
353
354 // Identify the input and output tensors to the model.
355 std::vector<uint32_t> modelInputOperands = {
356 inputOp,
357 filterOp,
358 };
359 status = ANeuralNetworksModel_identifyInputsAndOutputs(model_, modelInputOperands.size(),
360 modelInputOperands.data(), 1, &outputOp);
361 if (status != ANEURALNETWORKS_NO_ERROR) {
362 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
363 "ANeuralNetworksModel_identifyInputsAndOutputs failed");
364 return false;
365 }
366
367 // Use of Float16 is required for TPU
368 status = ANeuralNetworksModel_relaxComputationFloat32toFloat16(model_, true);
369 if (status != ANEURALNETWORKS_NO_ERROR) {
370 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
371 "ANeuralNetworksModel_relaxComputationFloat32toFloat16 failed");
372 return false;
373 }
374
375 // Finish constructing the model.
376 // The values of constant and intermediate operands cannot be altered after
377 // the finish function is called.
378 status = ANeuralNetworksModel_finish(model_);
379 if (status != ANEURALNETWORKS_NO_ERROR) {
380 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksModel_finish failed");
381 return false;
382 }
383
384 // Create the ANeuralNetworksCompilation object for the constructed model.
385 status = ANeuralNetworksCompilation_create(model_, &compilation_);
386 if (status != ANEURALNETWORKS_NO_ERROR) {
387 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksCompilation_create failed");
388 return false;
389 }
390
391 // Set the preference for the compilation, so that the runtime and drivers
392 // can make better decisions.
393 // Here we prefer to get the answer quickly, so we choose
394 // ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER.
395 status = ANeuralNetworksCompilation_setPreference(compilation_,
396 ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
397 if (status != ANEURALNETWORKS_NO_ERROR) {
398 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
399 "ANeuralNetworksCompilation_setPreference failed");
400 return false;
401 }
402
403 // Finish the compilation.
404 status = ANeuralNetworksCompilation_finish(compilation_);
405 if (status != ANEURALNETWORKS_NO_ERROR) {
406 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksCompilation_finish failed");
407 return false;
408 }
409
410 inputTensorFd_ = ASharedMemory_create("input", inputSize_ * sizeof(float));
411 EXPECT_NE(-1, inputTensorFd_);
412 inputTensorPtr_ =
413 reinterpret_cast<float*>(mmap(nullptr, inputSize_ * sizeof(float),
414 PROT_READ | PROT_WRITE, MAP_SHARED, inputTensorFd_, 0));
415 EXPECT_NE(MAP_FAILED, (void*)inputTensorPtr_);
416 status = ANeuralNetworksMemory_createFromFd(inputSize_ * sizeof(float), PROT_READ,
417 inputTensorFd_, 0, &memoryInput_);
418 if (status != ANEURALNETWORKS_NO_ERROR) {
419 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
420 "ANeuralNetworksMemory_createFromFd failed for input");
421 return false;
422 }
423
424 filterTensorFd_ = ASharedMemory_create("filter", filterSize_ * sizeof(float));
425 EXPECT_NE(-1, filterTensorFd_);
426 filterTensorPtr_ =
427 reinterpret_cast<float*>(mmap(nullptr, filterSize_ * sizeof(float),
428 PROT_READ | PROT_WRITE, MAP_SHARED, filterTensorFd_, 0));
429 EXPECT_NE(MAP_FAILED, (void*)filterTensorPtr_);
430 status = ANeuralNetworksMemory_createFromFd(filterSize_ * sizeof(float), PROT_READ,
431 filterTensorFd_, 0, &memoryFilter_);
432 if (status != ANEURALNETWORKS_NO_ERROR) {
433 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
434 "ANeuralNetworksMemory_createFromFd failed for filter");
435 return false;
436 }
437
438 outputTensorFd_ = ASharedMemory_create("output", outputSize_ * sizeof(float));
439 EXPECT_NE(-1, outputTensorFd_);
440 outputTensorPtr_ = reinterpret_cast<float*>(
441 mmap(nullptr, outputSize_ * sizeof(float), PROT_READ, MAP_SHARED, outputTensorFd_, 0));
442 EXPECT_NE(MAP_FAILED, (void*)outputTensorPtr_);
443
444 status = ANeuralNetworksMemory_createFromFd(outputSize_ * sizeof(float), PROT_READ | PROT_WRITE,
445 outputTensorFd_, 0, &memoryOutput_);
446 if (status != ANEURALNETWORKS_NO_ERROR) {
447 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
448 "ANeuralNetworksMemory_createFromFd failed for Output");
449 return false;
450 }
451 return true;
452 }
453
454 /**
455 * Compute with the given input data.
456 * @param inputValue to fill the input data tensor
457 * @param filterValue to fill the filter tensor
458 * @return true on success
459 * result is the output value.
460 */
Compute(float inputValue,float filterValue,float * result)461 bool Conv2DModel::Compute(float inputValue, float filterValue, float* result) {
462 if (!result) {
463 return false;
464 }
465
466 // Create an ANeuralNetworksExecution object from the compiled model.
467 // Note:
468 // 1. All the input and output data are tied to the ANeuralNetworksExecution
469 // object.
470 // 2. Multiple concurrent execution instances could be created from the same
471 // compiled model.
472 // This sample only uses one execution of the compiled model.
473 ANeuralNetworksExecution* execution;
474 int32_t status = ANeuralNetworksExecution_create(compilation_, &execution);
475 if (status != ANEURALNETWORKS_NO_ERROR) {
476 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksExecution_create failed");
477 return false;
478 }
479
480 for (size_t i = 0; i < inputSize_; i++) {
481 inputTensorPtr_[i] = inputValue;
482 }
483
484 status = ANeuralNetworksExecution_setInputFromMemory(execution, 0, nullptr, memoryInput_, 0,
485 inputSize_ * sizeof(float));
486 if (status != ANEURALNETWORKS_NO_ERROR) {
487 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
488 "ANeuralNetworksExecution_setInputFromMemory failed for input");
489 return false;
490 }
491
492 for (size_t i = 0; i < filterSize_; i++) {
493 filterTensorPtr_[i] = filterValue;
494 }
495
496 status = ANeuralNetworksExecution_setInputFromMemory(execution, 1, nullptr, memoryFilter_, 0,
497 filterSize_ * sizeof(float));
498 if (status != ANEURALNETWORKS_NO_ERROR) {
499 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
500 "ANeuralNetworksExecution_setInputFromMemory failed for filter");
501 return false;
502 }
503
504 // Set the output tensor that will be filled by executing the model.
505 // We use shared memory here to minimize the copies needed for getting the
506 // output data.
507 status = ANeuralNetworksExecution_setOutputFromMemory(execution, 0, nullptr, memoryOutput_, 0,
508 outputSize_ * sizeof(float));
509 if (status != ANEURALNETWORKS_NO_ERROR) {
510 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
511 "ANeuralNetworksExecution_setOutputFromMemory failed for output");
512 return false;
513 }
514
515 // Start the execution of the model.
516 // Note that the execution here is asynchronous, and an ANeuralNetworksEvent
517 // object will be created to monitor the status of the execution.
518 ANeuralNetworksEvent* event = nullptr;
519 status = ANeuralNetworksExecution_startCompute(execution, &event);
520 if (status != ANEURALNETWORKS_NO_ERROR) {
521 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
522 "ANeuralNetworksExecution_startCompute failed");
523 return false;
524 }
525
526 // Wait until the completion of the execution. This could be done on a
527 // different thread. By waiting immediately, we effectively make this a
528 // synchronous call.
529 status = ANeuralNetworksEvent_wait(event);
530 if (status != ANEURALNETWORKS_NO_ERROR) {
531 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "ANeuralNetworksEvent_wait failed");
532 return false;
533 }
534
535 ANeuralNetworksEvent_free(event);
536 ANeuralNetworksExecution_free(execution);
537
538 // Validate the results.
539 *result = outputTensorPtr_[0];
540 return true;
541 }
542
TEST(audio_nnapi_tests,Conv2DModel)543 TEST(audio_nnapi_tests, Conv2DModel) {
544 Conv2DModel model;
545 float result = 0.f;
546 EXPECT_EQ(true, model.CreateModel(1 /* batches */, 16 /* height */, 16 /* width */,
547 3 /* filter_height */, 3 /* filter_width */, 1 /* depth_in */,
548 1 /* depth_out */, 0.f /* biasValue */
549 ));
550 EXPECT_EQ(true, model.Compute(10.f, 11.f, &result));
551 EXPECT_EQ((10.f * 11.f) * (2 * 2), result);
552
553 EXPECT_EQ(true, model.Compute(4.f, 5.f, &result));
554 EXPECT_EQ((4.f * 5.f) * (2 * 2), result);
555 }
556