1 /**
2 * Copyright 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "run_tflite.h"
18
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26
27 #include <cstdio>
28 #include <fstream>
29
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
32
33 #include "tensorflow/lite/kernels/register.h"
34 #include "tensorflow/lite/nnapi/sl/include/SupportLibrary.h"
35
36 #define LOG_TAG "NN_BENCHMARK"
37
38 #define FATAL(fmt, ...) \
39 do { \
40 __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
41 assert(false); \
42 } while (0)
43
44 namespace {
45
currentTimeInUsec()46 long long currentTimeInUsec() {
47 timeval tv;
48 gettimeofday(&tv, NULL);
49 return ((tv.tv_sec * 1000000L) + tv.tv_usec);
50 }
51
52 // Workaround for build systems that make difficult to pick the correct NDK API
53 // level. NDK tracing methods are dynamically loaded from libandroid.so.
54 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
55 typedef void* (*fp_ATrace_endSection)();
56 struct TraceFunc {
57 fp_ATrace_beginSection ATrace_beginSection;
58 fp_ATrace_endSection ATrace_endSection;
59 };
setupTraceFunc()60 TraceFunc setupTraceFunc() {
61 void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
62 if (lib == nullptr) {
63 FATAL("unable to open libandroid.so");
64 }
65 return {
66 reinterpret_cast<fp_ATrace_beginSection>(
67 dlsym(lib, "ATrace_beginSection")),
68 reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
69 }
70 static TraceFunc kTraceFunc{setupTraceFunc()};
71
72 // Returns the number of partitions associated, as result of a call to
73 // ModifyGraphWithDelegate, to the given delegate.
CountPartitionsDelegatedTo(tflite::Subgraph * subgraph,const TfLiteDelegate * delegate)74 int CountPartitionsDelegatedTo(tflite::Subgraph* subgraph,
75 const TfLiteDelegate* delegate) {
76 return std::count_if(
77 subgraph->nodes_and_registration().begin(),
78 subgraph->nodes_and_registration().end(),
79 [delegate](
80 std::pair<TfLiteNode, TfLiteRegistration> node_and_registration) {
81 return node_and_registration.first.delegate == delegate;
82 });
83 }
84
85 // Returns the number of partitions associated, as result of a call to
86 // ModifyGraphWithDelegate, to the given delegate.
CountPartitionsDelegatedTo(tflite::Interpreter * interpreter,const TfLiteDelegate * delegate)87 int CountPartitionsDelegatedTo(tflite::Interpreter* interpreter,
88 const TfLiteDelegate* delegate) {
89 int result = 0;
90 for (int i = 0; i < interpreter->subgraphs_size(); i++) {
91 tflite::Subgraph* subgraph = interpreter->subgraph(i);
92
93 result += CountPartitionsDelegatedTo(subgraph, delegate);
94 }
95
96 return result;
97 }
98
99 } // namespace
100
create(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir,const tflite::nnapi::NnApiSupportLibrary * nnApiSl)101 BenchmarkModel* BenchmarkModel::create(const char* modelfile, int tfliteBackend,
102 bool enable_intermediate_tensors_dump, int* nnapiErrno,
103 const char* nnapi_device_name, bool mmapModel,
104 const char* nnapi_cache_dir,
105 const tflite::nnapi::NnApiSupportLibrary* nnApiSl) {
106 BenchmarkModel* model = new BenchmarkModel();
107 if (!model->init(modelfile, tfliteBackend, enable_intermediate_tensors_dump, nnapiErrno,
108 nnapi_device_name, mmapModel, nnapi_cache_dir, nnApiSl)) {
109 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
110 delete model;
111 return nullptr;
112 }
113 return model;
114 }
115
init(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir,const tflite::nnapi::NnApiSupportLibrary * nnApiSl)116 bool BenchmarkModel::init(const char* modelfile, int tfliteBackend,
117 bool enable_intermediate_tensors_dump, int* nnapiErrno,
118 const char* nnapi_device_name, bool mmapModel,
119 const char* nnapi_cache_dir,
120 const tflite::nnapi::NnApiSupportLibrary* nnApiSl) {
121 __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "BenchmarkModel %s",
122 modelfile);
123 mModelFile = modelfile;
124 if (nnapi_cache_dir) {
125 mCacheDir = nnapi_cache_dir;
126 }
127 if (nnapi_device_name) {
128 mNnApiDeviceName = nnapi_device_name;
129 }
130
131 if (mmapModel) {
132 // Memory map the model. NOTE this needs lifetime greater than or equal
133 // to interpreter context.
134 mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
135 } else {
136 std::ifstream t(modelfile);
137 mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
138 mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
139 }
140 if (!mTfliteModel) {
141 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
142 modelfile);
143 return false;
144 }
145
146 tflite::ops::builtin::BuiltinOpResolver resolver;
147 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
148 if (!mTfliteInterpreter) {
149 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
150 "Failed to create TFlite interpreter");
151 return false;
152 }
153
154 if (enable_intermediate_tensors_dump) {
155 // Make output of every op a model output. This way we will be able to
156 // fetch each intermediate tensor when running with delegates.
157 outputs.clear();
158 for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
159 auto node_outputs =
160 mTfliteInterpreter->node_and_registration(node)->first.outputs;
161 outputs.insert(outputs.end(), node_outputs->data,
162 node_outputs->data + node_outputs->size);
163 }
164 mTfliteInterpreter->SetOutputs(outputs);
165 }
166
167 // Allow Fp16 precision for all models
168 mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
169
170 mTfliteBackend = tfliteBackend;
171 switch (mTfliteBackend) {
172 case TFLITE_NNAPI: {
173 tflite::StatefulNnApiDelegate::Options nnapi_options;
174 nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
175 __android_log_print(ANDROID_LOG_INFO, LOG_TAG,
176 "Delegating to NNAPI device '%s'", mNnApiDeviceName.c_str());
177 if (nnApiSl) {
178 mNnApiSl = nnApiSl;
179 __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "Using NNAPI SL");
180 }
181 mTfliteNnapiDelegate =
182 nnApiSl
183 ? std::make_unique<tflite::StatefulNnApiDelegate>(nnApiSl->getFL5(), nnapi_options)
184 : std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
185 int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
186 *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
187 if ((delegationStatus == kTfLiteOk) &&
188 (*nnapiErrno == ANEURALNETWORKS_NO_ERROR)) {
189 int nnapiPartitions =
190 CountPartitionsDelegatedTo(mTfliteInterpreter.get(), mTfliteNnapiDelegate.get());
191 if (nnapiPartitions == 0) {
192 __android_log_print(
193 ANDROID_LOG_ERROR, LOG_TAG,
194 "NNAPI Delegate (%s) for model %s was delegated with %d partitions delegated to NNAPI!!",
195 nnapi_device_name, modelfile, nnapiPartitions);
196
197 return false;
198 } else {
199 __android_log_print(
200 ANDROID_LOG_INFO, LOG_TAG,
201 "NNAPI Delegate (%s) for model %s initialized successfully with %d partitions delegated to NNAPI",
202 nnapi_device_name, modelfile, nnapiPartitions);
203 }
204 } else {
205 __android_log_print(
206 ANDROID_LOG_ERROR, LOG_TAG,
207 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
208 modelfile, *nnapiErrno);
209 return false;
210 }
211
212 } break;
213 case TFLITE_GPU: {
214 #if defined(NN_BENCHMARK_ENABLE_GPU)
215 mGpuDelegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
216 if (mTfliteInterpreter->ModifyGraphWithDelegate(mGpuDelegate) !=
217 kTfLiteOk) {
218 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
219 "Failed to initialize GPU Delegate");
220 return false;
221 } else {
222 int gpuPartitions =
223 CountPartitionsDelegatedTo(mTfliteInterpreter.get(), mGpuDelegate.get());
224 if (gpuPartitions == 0) {
225 ANDROID_LOG_INFO, LOG_TAG,
226 "GPU Delegate (%s) for model %s initialized successfully with %d partitions delegated",
227 nnapi_device_name, modelfile, gpuPartitions);
228 return false;
229 }
230 }
231 #else // !defined(NN_BENCHMARK_ENABLE_GPU)
232 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
233 "GPU delegate requested but not enabled with "
234 "NN_BENCHMARK_ENABLE_GPU");
235 return false;
236 #endif // defined(NN_BENCHMARK_ENABLE_GPU)
237 } break;
238 default:
239 break;
240 }
241 return true;
242 }
243
~BenchmarkModel()244 BenchmarkModel::~BenchmarkModel() {
245 switch (mTfliteBackend) {
246 case TFLITE_GPU: {
247 #if defined(NN_BENCHMARK_ENABLE_GPU) // !defined(NN_BENCHMARK_ENABLE_GPU)
248 TfLiteGpuDelegateV2Delete(mGpuDelegate);
249 #endif // !defined(NN_BENCHMARK_ENABLE_GPU)
250 } break;
251 default:
252 break;
253 }
254 }
255
setInput(const uint8_t * dataPtr,size_t length)256 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
257 int input = mTfliteInterpreter->inputs()[0];
258 auto* input_tensor = mTfliteInterpreter->tensor(input);
259
260 switch (input_tensor->type) {
261 case kTfLiteFloat32:
262 case kTfLiteUInt8: {
263 void* raw = input_tensor->data.raw;
264 memcpy(raw, dataPtr, length);
265 break;
266 }
267 default:
268 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
269 "Input tensor type not supported");
270 return false;
271 }
272 return true;
273 }
saveInferenceOutput(InferenceResult * result,int output_index)274 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
275 int output_index) {
276 int output = mTfliteInterpreter->outputs()[output_index];
277 auto* output_tensor = mTfliteInterpreter->tensor(output);
278 auto& sink = result->inferenceOutputs[output_index];
279 sink.insert(sink.end(), output_tensor->data.uint8,
280 output_tensor->data.uint8 + output_tensor->bytes);
281 }
282
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)283 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
284 InferenceResult* result, int output_index) {
285 int output = mTfliteInterpreter->outputs()[output_index];
286 auto* output_tensor = mTfliteInterpreter->tensor(output);
287 if (output_tensor->bytes != length) {
288 FATAL("Wrong size of output tensor, expected %zu, is %zu",
289 output_tensor->bytes, length);
290 }
291
292 size_t elements_count = 0;
293 float err_sum = 0.0;
294 float max_error = 0.0;
295 switch (output_tensor->type) {
296 case kTfLiteUInt8: {
297 uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
298 elements_count = output_tensor->bytes;
299 for (size_t i = 0; i < output_tensor->bytes; ++i) {
300 float err = ((float)output_raw[i]) - ((float)expected_data[i]);
301 if (err > max_error) max_error = err;
302 err_sum += err * err;
303 }
304 break;
305 }
306 case kTfLiteFloat32: {
307 const float* expected = reinterpret_cast<const float*>(expected_data);
308 float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
309 elements_count = output_tensor->bytes / sizeof(float);
310 for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
311 float err = output_raw[i] - expected[i];
312 if (err > max_error) max_error = err;
313 err_sum += err * err;
314 }
315 break;
316 }
317 default:
318 FATAL("Output sensor type %d not supported", output_tensor->type);
319 }
320 result->meanSquareErrors[output_index] = err_sum / elements_count;
321 result->maxSingleErrors[output_index] = max_error;
322 }
323
resizeInputTensors(std::vector<int> shape)324 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
325 // The benchmark only expects single input tensor, hardcoded as 0.
326 int input = mTfliteInterpreter->inputs()[0];
327 mTfliteInterpreter->ResizeInputTensor(input, shape);
328 if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
329 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
330 "Failed to allocate tensors!");
331 return false;
332 }
333 return true;
334 }
335
runInference()336 bool BenchmarkModel::runInference() {
337 auto status = mTfliteInterpreter->Invoke();
338 auto nnapi_errno = mTfliteNnapiDelegate
339 ? mTfliteNnapiDelegate->GetNnApiErrno()
340 : ANEURALNETWORKS_NO_ERROR;
341 if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
342 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
343 "Failed to invoke, tflite status: %d, nnapi errno: %d!",
344 (int)status, nnapi_errno);
345 return false;
346 }
347 return true;
348 }
349
resetStates()350 bool BenchmarkModel::resetStates() {
351 auto status = mTfliteInterpreter->ResetVariableTensors();
352 if (status != kTfLiteOk) {
353 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
354 "Failed to reset variable tensors: %d!", (int)status);
355 return false;
356 }
357 return true;
358 }
359
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)360 bool BenchmarkModel::benchmark(
361 const std::vector<InferenceInOutSequence>& inOutData,
362 int seqInferencesMaxCount, float timeout, int flags,
363 std::vector<InferenceResult>* results) {
364 if (inOutData.empty()) {
365 __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
366 "Input/output vector is empty");
367 return true;
368 }
369
370 float inferenceTotal = 0.0;
371 for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
372 ++seqInferenceIndex) {
373 resetStates();
374
375 const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
376 const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
377 const bool sampleResults = (flags & FLAG_SAMPLE_BENCHMARK_RESULTS) != 0;
378 for (int i = 0; i < seq.size(); ++i) {
379 const InferenceInOut& data = seq[i];
380
381 // For NNAPI systrace usage documentation, see
382 // frameworks/ml/nn/common/include/Tracing.h.
383 kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
384 kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
385 if (data.input) {
386 setInput(data.input, data.input_size);
387 } else {
388 int input = mTfliteInterpreter->inputs()[0];
389 auto* input_tensor = mTfliteInterpreter->tensor(input);
390 if (!data.createInput((uint8_t*)input_tensor->data.raw,
391 input_tensor->bytes)) {
392 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
393 "Input creation %d failed", i);
394 return false;
395 }
396 }
397 kTraceFunc.ATrace_endSection();
398 long long startTime = currentTimeInUsec();
399 const bool success = runInference();
400 kTraceFunc.ATrace_endSection();
401 long long endTime = currentTimeInUsec();
402 if (!success) {
403 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
404 i);
405 return false;
406 }
407
408 float inferenceTime =
409 static_cast<float>(endTime - startTime) / 1000000.0f;
410 size_t outputsCount = mTfliteInterpreter->outputs().size();
411 InferenceResult result{
412 inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
413 result.meanSquareErrors.resize(outputsCount);
414 result.maxSingleErrors.resize(outputsCount);
415 result.inferenceOutputs.resize(outputsCount);
416
417 if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
418 if (outputsCount != data.outputs.size()) {
419 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
420 "Golden/actual outputs (%zu/%zu) count mismatch",
421 data.outputs.size(), outputsCount);
422 return false;
423 }
424 for (int j = 0; j < outputsCount; ++j) {
425 getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
426 }
427 }
428
429 if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
430 for (int j = 0; j < outputsCount; ++j) {
431 saveInferenceOutput(&result, j);
432 }
433 }
434
435 if (!sampleResults || (seqInferenceIndex % INFERENCE_OUT_SAMPLE_RATE) == 0) {
436 results->push_back(result);
437 }
438 inferenceTotal += inferenceTime;
439 }
440
441 // Timeout?
442 if (timeout > 0.001 && inferenceTotal > timeout) {
443 return true;
444 }
445 }
446 return true;
447 }
448
449 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir,bool useNnapiSl)450 bool BenchmarkModel::runCompilation(const char* cacheDir, bool useNnapiSl) {
451 std::unique_ptr<tflite::StatefulNnApiDelegate> delegate;
452 std::unique_ptr<tflite::Interpreter> interpreter;
453 tflite::ops::builtin::BuiltinOpResolver resolver;
454 tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
455 if (!interpreter) {
456 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
457 return false;
458 }
459
460 // Allow Fp16 precision for all models
461 interpreter->SetAllowFp16PrecisionForFp32(true);
462
463 if (mTfliteBackend == TFLITE_NNAPI) {
464 tflite::StatefulNnApiDelegate::Options nnapi_options;
465 nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
466 if (cacheDir) {
467 nnapi_options.cache_dir = cacheDir;
468 nnapi_options.model_token = mModelFile.c_str();
469 }
470 if (useNnapiSl) {
471 __android_log_print(ANDROID_LOG_INFO, LOG_TAG,
472 "Use NNAPI SL in compilation caching benchmark.");
473 if (!mNnApiSl) {
474 __android_log_print(ANDROID_LOG_ERROR,
475 LOG_TAG,
476 "NNAPI SL is null pointer when running compilation caching benchmark.");
477 return false;
478 }
479 delegate = std::make_unique<tflite::StatefulNnApiDelegate>(mNnApiSl->getFL5(), nnapi_options);
480 } else {
481 delegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
482 }
483 int delegationStatus = interpreter->ModifyGraphWithDelegate(delegate.get());
484 auto nnapiErrno = delegate->GetNnApiErrno();
485 if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
486 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
487 "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
488 mModelFile.c_str(), nnapiErrno);
489 return false;
490 } else {
491 int nnapiPartitions =
492 CountPartitionsDelegatedTo(interpreter.get(), delegate.get());
493 if (nnapiPartitions == 0) {
494 __android_log_print(
495 ANDROID_LOG_ERROR, LOG_TAG,
496 "NNAPI Delegate (%s) for model %s was delegated with %d partitions delegated to NNAPI!!",
497 mNnApiDeviceName.c_str(), mModelFile.c_str(), nnapiPartitions);
498 return false;
499 }
500 }
501 }
502 return true;
503 }
504
505 // A helper class to manage the lifetime of a temporary cache directory.
506 class ScopedTempDirectory {
507 public:
ScopedTempDirectory(std::string base)508 ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()509 ~ScopedTempDirectory() { cleanup(); }
510
511 // Create a new temp directory, remove the old one if needed.
recreate()512 void recreate() {
513 cleanup();
514 mTempDir = mBase + "/XXXXXX";
515 mkdtemp(&mTempDir[0]);
516 }
517
518 // Get the path to the temp directory.
get() const519 const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
520
521 private:
cleanup()522 void cleanup() {
523 if (mTempDir.empty()) {
524 return;
525 }
526 auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
527 return remove(entry);
528 };
529 nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
530 mTempDir.clear();
531 }
532
533 std::string mBase;
534 std::string mTempDir;
535 };
536
getCompilationCacheSize(int * cacheSizeBytes,bool useNnapiSl)537 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes, bool useNnapiSl) {
538 if (cacheSizeBytes == nullptr) return false;
539
540 // Create cache files.
541 ScopedTempDirectory tempDir(mCacheDir.value());
542 tempDir.recreate();
543 const bool success = runCompilation(tempDir.get(), useNnapiSl);
544 if (!success) {
545 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
546 return false;
547 }
548
549 // Compute total size of cache files.
550 int totalSize = 0;
551 DIR* dir = opendir(tempDir.get());
552 if (dir == nullptr) {
553 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
554 return false;
555 }
556 struct dirent* dp = nullptr;
557 while ((dp = readdir(dir)) != nullptr) {
558 char fullPath[1024];
559 snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
560 struct stat st;
561 int err = stat(fullPath, &st);
562 if (err != 0) {
563 closedir(dir);
564 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
565 return false;
566 }
567 // Only accumulate sizes of regular files. This will exclude '.' and '..'.
568 if (S_ISREG(st.st_mode)) {
569 totalSize += st.st_size;
570 }
571 }
572 closedir(dir);
573 *cacheSizeBytes = totalSize;
574 return true;
575 }
576
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,bool useNnapiSl,std::vector<float> * results)577 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
578 int maxNumIterations, float timeout,
579 bool useNnapiSl,
580 std::vector<float>* results) {
581 if (results != nullptr) {
582 results->clear();
583 }
584 ScopedTempDirectory tempDir(mCacheDir.value());
585
586 // Initialize cache files to benchmark cache hit.
587 if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
588 tempDir.recreate();
589 const bool success = runCompilation(tempDir.get(), useNnapiSl);
590 if (!success) {
591 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
592 return false;
593 }
594 }
595
596 float compilationTotal = 0.0;
597 for (int i = 0; i < maxNumIterations; i++) {
598 const char* cacheDir = nullptr;
599 switch (type) {
600 case CompilationBenchmarkType::WITHOUT_CACHE:
601 cacheDir = nullptr;
602 break;
603 case CompilationBenchmarkType::SAVE_TO_CACHE:
604 // Remove the cache files from the last iteration to benchmark cache miss.
605 tempDir.recreate();
606 [[fallthrough]];
607 case CompilationBenchmarkType::PREPARE_FROM_CACHE:
608 cacheDir = tempDir.get();
609 break;
610 default:
611 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
612 static_cast<int>(type));
613 return false;
614 }
615
616 kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
617 const long long startTime = currentTimeInUsec();
618 const bool success = runCompilation(cacheDir, useNnapiSl);
619 const long long endTime = currentTimeInUsec();
620 kTraceFunc.ATrace_endSection();
621 if (!success) {
622 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
623 return false;
624 }
625
626 const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
627 if (results != nullptr) {
628 results->push_back(compilationTime);
629 }
630
631 // Timeout?
632 compilationTotal += compilationTime;
633 if (timeout > 0.001 && compilationTotal > timeout) {
634 return true;
635 }
636 }
637 return true;
638 }
639
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,bool useNnapiSl,std::vector<float> * results)640 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
641 int maxNumIterations,
642 float warmupTimeout,
643 float runTimeout,
644 bool useNnapiSl,
645 std::vector<float>* results) {
646 kTraceFunc.ATrace_beginSection(
647 "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
648 bool success = benchmarkSingleTypeOfCompilation(type,
649 maxNumIterations,
650 warmupTimeout,
651 useNnapiSl,
652 nullptr);
653 kTraceFunc.ATrace_endSection();
654 if (!success) return false;
655
656 kTraceFunc.ATrace_beginSection(
657 "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
658 success = benchmarkSingleTypeOfCompilation(type,
659 maxNumIterations,
660 runTimeout,
661 useNnapiSl,
662 results);
663 kTraceFunc.ATrace_endSection();
664 return success;
665 }
666
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,bool useNnapiSl,CompilationBenchmarkResult * result)667 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
668 float runTimeout, bool useNnapiSl,
669 CompilationBenchmarkResult* result) {
670 if (result == nullptr) return false;
671
672 // Benchmark compile without cache.
673 bool success = benchmarkSingleTypeOfCompilationWithWarmup(
674 CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations,
675 warmupTimeout, runTimeout, useNnapiSl,
676 &result->compileWithoutCacheTimeSec);
677 if (!success) {
678 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
679 "Failed to benchmark compilation without cache");
680 return false;
681 }
682
683 // Get compilation cache size.
684 success = getCompilationCacheSize(&result->cacheSizeBytes, useNnapiSl);
685 if (!success) {
686 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
687 return false;
688 }
689
690 // Benchmark saving to cache and preparing from cache only if supported.
691 if (result->cacheSizeBytes > 0) {
692 // Benchmark saving to cache.
693 auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
694 success = benchmarkSingleTypeOfCompilationWithWarmup(
695 CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations,
696 warmupTimeout, runTimeout, useNnapiSl,
697 &saveToCacheTimeSec);
698 if (!success) {
699 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
700 return false;
701 }
702
703 // Benchmark preparing from cache.
704 auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
705 success = benchmarkSingleTypeOfCompilationWithWarmup(
706 CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations,
707 warmupTimeout, runTimeout, useNnapiSl,
708 &prepareFromCacheTimeSec);
709 if (!success) {
710 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
711 return false;
712 }
713 }
714 return result;
715 }
716
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)717 bool BenchmarkModel::dumpAllLayers(
718 const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
719 if (inOutData.empty()) {
720 FATAL("Input/output vector is empty");
721 }
722
723 for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
724 ++seqInferenceIndex) {
725 resetStates();
726
727 const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
728 for (int i = 0; i < seq.size(); ++i) {
729 const InferenceInOut& data = seq[i];
730 setInput(data.input, data.input_size);
731 const bool success = runInference();
732 if (!success) {
733 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
734 i);
735 return false;
736 }
737
738 // The order of the tensor is not sorted by the tensor index
739 for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
740 int tensor_index = outputs[tensor_order];
741 auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
742 if (output_tensor->data.raw == nullptr) {
743 __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
744 "output_tensor->data.raw == nullptr at index %d ", tensor_index);
745 continue;
746 }
747 char fullpath[1024];
748 snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
749 seqInferenceIndex, i, tensor_order, tensor_index);
750 FILE* f = fopen(fullpath, "wb");
751 fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
752 fclose(f);
753 }
754 }
755 }
756 return true;
757 }
758