1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "run_tflite.h"
18 
19 #include <android/log.h>
20 #include <dirent.h>
21 #include <dlfcn.h>
22 #include <fcntl.h>
23 #include <ftw.h>
24 #include <sys/time.h>
25 #include <unistd.h>
26 
27 #include <cstdio>
28 #include <fstream>
29 
30 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
31 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
32 
33 #include "tensorflow/lite/kernels/register.h"
34 #include "tensorflow/lite/nnapi/sl/include/SupportLibrary.h"
35 
36 #define LOG_TAG "NN_BENCHMARK"
37 
38 #define FATAL(fmt, ...)                                                  \
39   do {                                                                   \
40     __android_log_print(ANDROID_LOG_FATAL, LOG_TAG, fmt, ##__VA_ARGS__); \
41     assert(false);                                                       \
42   } while (0)
43 
44 namespace {
45 
currentTimeInUsec()46 long long currentTimeInUsec() {
47   timeval tv;
48   gettimeofday(&tv, NULL);
49   return ((tv.tv_sec * 1000000L) + tv.tv_usec);
50 }
51 
52 // Workaround for build systems that make difficult to pick the correct NDK API
53 // level. NDK tracing methods are dynamically loaded from libandroid.so.
54 typedef void* (*fp_ATrace_beginSection)(const char* sectionName);
55 typedef void* (*fp_ATrace_endSection)();
56 struct TraceFunc {
57   fp_ATrace_beginSection ATrace_beginSection;
58   fp_ATrace_endSection ATrace_endSection;
59 };
setupTraceFunc()60 TraceFunc setupTraceFunc() {
61   void* lib = dlopen("libandroid.so", RTLD_NOW | RTLD_LOCAL);
62   if (lib == nullptr) {
63     FATAL("unable to open libandroid.so");
64   }
65   return {
66       reinterpret_cast<fp_ATrace_beginSection>(
67           dlsym(lib, "ATrace_beginSection")),
68       reinterpret_cast<fp_ATrace_endSection>(dlsym(lib, "ATrace_endSection"))};
69 }
70 static TraceFunc kTraceFunc{setupTraceFunc()};
71 
72 // Returns the number of partitions associated, as result of a call to
73 // ModifyGraphWithDelegate, to the given delegate.
CountPartitionsDelegatedTo(tflite::Subgraph * subgraph,const TfLiteDelegate * delegate)74 int CountPartitionsDelegatedTo(tflite::Subgraph* subgraph,
75                                const TfLiteDelegate* delegate) {
76   return std::count_if(
77       subgraph->nodes_and_registration().begin(),
78       subgraph->nodes_and_registration().end(),
79       [delegate](
80           std::pair<TfLiteNode, TfLiteRegistration> node_and_registration) {
81         return node_and_registration.first.delegate == delegate;
82       });
83 }
84 
85 // Returns the number of partitions associated, as result of a call to
86 // ModifyGraphWithDelegate, to the given delegate.
CountPartitionsDelegatedTo(tflite::Interpreter * interpreter,const TfLiteDelegate * delegate)87 int CountPartitionsDelegatedTo(tflite::Interpreter* interpreter,
88                                const TfLiteDelegate* delegate) {
89   int result = 0;
90   for (int i = 0; i < interpreter->subgraphs_size(); i++) {
91     tflite::Subgraph* subgraph = interpreter->subgraph(i);
92 
93     result += CountPartitionsDelegatedTo(subgraph, delegate);
94   }
95 
96   return result;
97 }
98 
99 }  // namespace
100 
create(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir,const tflite::nnapi::NnApiSupportLibrary * nnApiSl)101 BenchmarkModel* BenchmarkModel::create(const char* modelfile, int tfliteBackend,
102                                        bool enable_intermediate_tensors_dump, int* nnapiErrno,
103                                        const char* nnapi_device_name, bool mmapModel,
104                                        const char* nnapi_cache_dir,
105                                        const tflite::nnapi::NnApiSupportLibrary* nnApiSl) {
106   BenchmarkModel* model = new BenchmarkModel();
107   if (!model->init(modelfile, tfliteBackend, enable_intermediate_tensors_dump, nnapiErrno,
108                    nnapi_device_name, mmapModel, nnapi_cache_dir, nnApiSl)) {
109     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to init model %s", modelfile);
110     delete model;
111     return nullptr;
112   }
113   return model;
114 }
115 
init(const char * modelfile,int tfliteBackend,bool enable_intermediate_tensors_dump,int * nnapiErrno,const char * nnapi_device_name,bool mmapModel,const char * nnapi_cache_dir,const tflite::nnapi::NnApiSupportLibrary * nnApiSl)116 bool BenchmarkModel::init(const char* modelfile, int tfliteBackend,
117                           bool enable_intermediate_tensors_dump, int* nnapiErrno,
118                           const char* nnapi_device_name, bool mmapModel,
119                           const char* nnapi_cache_dir,
120                           const tflite::nnapi::NnApiSupportLibrary* nnApiSl) {
121   __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "BenchmarkModel %s",
122                       modelfile);
123   mModelFile = modelfile;
124   if (nnapi_cache_dir) {
125     mCacheDir = nnapi_cache_dir;
126   }
127   if (nnapi_device_name) {
128     mNnApiDeviceName = nnapi_device_name;
129   }
130 
131   if (mmapModel) {
132     // Memory map the model. NOTE this needs lifetime greater than or equal
133     // to interpreter context.
134     mTfliteModel = tflite::FlatBufferModel::BuildFromFile(modelfile);
135   } else {
136     std::ifstream t(modelfile);
137     mModelBuffer = std::string((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
138     mTfliteModel = tflite::FlatBufferModel::BuildFromBuffer(mModelBuffer.c_str(), mModelBuffer.size());
139   }
140   if (!mTfliteModel) {
141     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to load model %s",
142                         modelfile);
143     return false;
144   }
145 
146   tflite::ops::builtin::BuiltinOpResolver resolver;
147   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&mTfliteInterpreter);
148   if (!mTfliteInterpreter) {
149     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
150                         "Failed to create TFlite interpreter");
151     return false;
152   }
153 
154   if (enable_intermediate_tensors_dump) {
155     // Make output of every op a model output. This way we will be able to
156     // fetch each intermediate tensor when running with delegates.
157     outputs.clear();
158     for (size_t node = 0; node < mTfliteInterpreter->nodes_size(); ++node) {
159       auto node_outputs =
160           mTfliteInterpreter->node_and_registration(node)->first.outputs;
161       outputs.insert(outputs.end(), node_outputs->data,
162                      node_outputs->data + node_outputs->size);
163     }
164     mTfliteInterpreter->SetOutputs(outputs);
165   }
166 
167   // Allow Fp16 precision for all models
168   mTfliteInterpreter->SetAllowFp16PrecisionForFp32(true);
169 
170   mTfliteBackend = tfliteBackend;
171   switch (mTfliteBackend) {
172     case TFLITE_NNAPI: {
173       tflite::StatefulNnApiDelegate::Options nnapi_options;
174       nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
175       __android_log_print(ANDROID_LOG_INFO, LOG_TAG,
176           "Delegating to NNAPI device '%s'", mNnApiDeviceName.c_str());
177       if (nnApiSl) {
178         mNnApiSl = nnApiSl;
179         __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "Using NNAPI SL");
180       }
181       mTfliteNnapiDelegate =
182           nnApiSl
183               ? std::make_unique<tflite::StatefulNnApiDelegate>(nnApiSl->getFL5(), nnapi_options)
184               : std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
185       int delegationStatus = mTfliteInterpreter->ModifyGraphWithDelegate(mTfliteNnapiDelegate.get());
186       *nnapiErrno = mTfliteNnapiDelegate->GetNnApiErrno();
187       if ((delegationStatus == kTfLiteOk) &&
188           (*nnapiErrno == ANEURALNETWORKS_NO_ERROR)) {
189         int nnapiPartitions =
190           CountPartitionsDelegatedTo(mTfliteInterpreter.get(), mTfliteNnapiDelegate.get());
191         if (nnapiPartitions == 0) {
192           __android_log_print(
193               ANDROID_LOG_ERROR, LOG_TAG,
194               "NNAPI Delegate (%s) for model %s was delegated with %d partitions delegated to NNAPI!!",
195               nnapi_device_name, modelfile, nnapiPartitions);
196 
197               return false;
198         } else {
199           __android_log_print(
200               ANDROID_LOG_INFO, LOG_TAG,
201               "NNAPI Delegate (%s) for model %s initialized successfully with %d partitions delegated to NNAPI",
202               nnapi_device_name, modelfile, nnapiPartitions);
203         }
204       } else {
205         __android_log_print(
206               ANDROID_LOG_ERROR, LOG_TAG,
207               "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
208               modelfile, *nnapiErrno);
209           return false;
210       }
211 
212     } break;
213     case TFLITE_GPU: {
214 #if defined(NN_BENCHMARK_ENABLE_GPU)
215       mGpuDelegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
216       if (mTfliteInterpreter->ModifyGraphWithDelegate(mGpuDelegate) !=
217           kTfLiteOk) {
218         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
219                             "Failed to initialize GPU Delegate");
220         return false;
221       } else {
222         int gpuPartitions =
223           CountPartitionsDelegatedTo(mTfliteInterpreter.get(), mGpuDelegate.get());
224         if (gpuPartitions == 0) {
225               ANDROID_LOG_INFO, LOG_TAG,
226                 "GPU Delegate (%s) for model %s initialized successfully with %d partitions delegated",
227                 nnapi_device_name, modelfile, gpuPartitions);
228           return false;
229         }
230       }
231 #else  // !defined(NN_BENCHMARK_ENABLE_GPU)
232         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
233                             "GPU delegate requested but not enabled with "
234                             "NN_BENCHMARK_ENABLE_GPU");
235         return false;
236 #endif  // defined(NN_BENCHMARK_ENABLE_GPU)
237     } break;
238     default:
239       break;
240   }
241   return true;
242 }
243 
~BenchmarkModel()244 BenchmarkModel::~BenchmarkModel() {
245   switch (mTfliteBackend) {
246     case TFLITE_GPU: {
247 #if defined(NN_BENCHMARK_ENABLE_GPU)  // !defined(NN_BENCHMARK_ENABLE_GPU)
248       TfLiteGpuDelegateV2Delete(mGpuDelegate);
249 #endif  // !defined(NN_BENCHMARK_ENABLE_GPU)
250     } break;
251     default:
252       break;
253   }
254 }
255 
setInput(const uint8_t * dataPtr,size_t length)256 bool BenchmarkModel::setInput(const uint8_t* dataPtr, size_t length) {
257   int input = mTfliteInterpreter->inputs()[0];
258   auto* input_tensor = mTfliteInterpreter->tensor(input);
259 
260   switch (input_tensor->type) {
261     case kTfLiteFloat32:
262     case kTfLiteUInt8: {
263       void* raw = input_tensor->data.raw;
264       memcpy(raw, dataPtr, length);
265       break;
266     }
267     default:
268       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
269                           "Input tensor type not supported");
270       return false;
271   }
272   return true;
273 }
saveInferenceOutput(InferenceResult * result,int output_index)274 void BenchmarkModel::saveInferenceOutput(InferenceResult* result,
275                                          int output_index) {
276   int output = mTfliteInterpreter->outputs()[output_index];
277   auto* output_tensor = mTfliteInterpreter->tensor(output);
278   auto& sink = result->inferenceOutputs[output_index];
279   sink.insert(sink.end(), output_tensor->data.uint8,
280               output_tensor->data.uint8 + output_tensor->bytes);
281 }
282 
getOutputError(const uint8_t * expected_data,size_t length,InferenceResult * result,int output_index)283 void BenchmarkModel::getOutputError(const uint8_t* expected_data, size_t length,
284                                     InferenceResult* result, int output_index) {
285   int output = mTfliteInterpreter->outputs()[output_index];
286   auto* output_tensor = mTfliteInterpreter->tensor(output);
287   if (output_tensor->bytes != length) {
288     FATAL("Wrong size of output tensor, expected %zu, is %zu",
289           output_tensor->bytes, length);
290   }
291 
292   size_t elements_count = 0;
293   float err_sum = 0.0;
294   float max_error = 0.0;
295   switch (output_tensor->type) {
296     case kTfLiteUInt8: {
297       uint8_t* output_raw = mTfliteInterpreter->typed_tensor<uint8_t>(output);
298       elements_count = output_tensor->bytes;
299       for (size_t i = 0; i < output_tensor->bytes; ++i) {
300         float err = ((float)output_raw[i]) - ((float)expected_data[i]);
301         if (err > max_error) max_error = err;
302         err_sum += err * err;
303       }
304       break;
305     }
306     case kTfLiteFloat32: {
307       const float* expected = reinterpret_cast<const float*>(expected_data);
308       float* output_raw = mTfliteInterpreter->typed_tensor<float>(output);
309       elements_count = output_tensor->bytes / sizeof(float);
310       for (size_t i = 0; i < output_tensor->bytes / sizeof(float); ++i) {
311         float err = output_raw[i] - expected[i];
312         if (err > max_error) max_error = err;
313         err_sum += err * err;
314       }
315       break;
316     }
317     default:
318       FATAL("Output sensor type %d not supported", output_tensor->type);
319   }
320   result->meanSquareErrors[output_index] = err_sum / elements_count;
321   result->maxSingleErrors[output_index] = max_error;
322 }
323 
resizeInputTensors(std::vector<int> shape)324 bool BenchmarkModel::resizeInputTensors(std::vector<int> shape) {
325   // The benchmark only expects single input tensor, hardcoded as 0.
326   int input = mTfliteInterpreter->inputs()[0];
327   mTfliteInterpreter->ResizeInputTensor(input, shape);
328   if (mTfliteInterpreter->AllocateTensors() != kTfLiteOk) {
329     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
330                         "Failed to allocate tensors!");
331     return false;
332   }
333   return true;
334 }
335 
runInference()336 bool BenchmarkModel::runInference() {
337   auto status = mTfliteInterpreter->Invoke();
338   auto nnapi_errno = mTfliteNnapiDelegate
339                          ? mTfliteNnapiDelegate->GetNnApiErrno()
340                          : ANEURALNETWORKS_NO_ERROR;
341   if (status != kTfLiteOk || nnapi_errno != ANEURALNETWORKS_NO_ERROR) {
342     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
343                         "Failed to invoke, tflite status: %d, nnapi errno: %d!",
344                         (int)status, nnapi_errno);
345     return false;
346   }
347   return true;
348 }
349 
resetStates()350 bool BenchmarkModel::resetStates() {
351   auto status = mTfliteInterpreter->ResetVariableTensors();
352   if (status != kTfLiteOk) {
353     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
354                         "Failed to reset variable tensors: %d!", (int)status);
355     return false;
356   }
357   return true;
358 }
359 
benchmark(const std::vector<InferenceInOutSequence> & inOutData,int seqInferencesMaxCount,float timeout,int flags,std::vector<InferenceResult> * results)360 bool BenchmarkModel::benchmark(
361     const std::vector<InferenceInOutSequence>& inOutData,
362     int seqInferencesMaxCount, float timeout, int flags,
363     std::vector<InferenceResult>* results) {
364   if (inOutData.empty()) {
365     __android_log_print(ANDROID_LOG_WARN, LOG_TAG,
366                         "Input/output vector is empty");
367     return true;
368   }
369 
370   float inferenceTotal = 0.0;
371   for (int seqInferenceIndex = 0; seqInferenceIndex < seqInferencesMaxCount;
372        ++seqInferenceIndex) {
373     resetStates();
374 
375     const int inputOutputSequenceIndex = seqInferenceIndex % inOutData.size();
376     const InferenceInOutSequence& seq = inOutData[inputOutputSequenceIndex];
377     const bool sampleResults = (flags & FLAG_SAMPLE_BENCHMARK_RESULTS) != 0;
378     for (int i = 0; i < seq.size(); ++i) {
379       const InferenceInOut& data = seq[i];
380 
381       // For NNAPI systrace usage documentation, see
382       // frameworks/ml/nn/common/include/Tracing.h.
383       kTraceFunc.ATrace_beginSection("[NN_LA_PE]BenchmarkModel::benchmark");
384       kTraceFunc.ATrace_beginSection("[NN_LA_PIO]BenchmarkModel::input");
385       if (data.input) {
386         setInput(data.input, data.input_size);
387       } else {
388         int input = mTfliteInterpreter->inputs()[0];
389         auto* input_tensor = mTfliteInterpreter->tensor(input);
390         if (!data.createInput((uint8_t*)input_tensor->data.raw,
391                               input_tensor->bytes)) {
392           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
393                               "Input creation %d failed", i);
394           return false;
395         }
396       }
397       kTraceFunc.ATrace_endSection();
398       long long startTime = currentTimeInUsec();
399       const bool success = runInference();
400       kTraceFunc.ATrace_endSection();
401       long long endTime = currentTimeInUsec();
402       if (!success) {
403         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
404                             i);
405         return false;
406       }
407 
408       float inferenceTime =
409           static_cast<float>(endTime - startTime) / 1000000.0f;
410       size_t outputsCount = mTfliteInterpreter->outputs().size();
411       InferenceResult result{
412           inferenceTime, {}, {}, {}, inputOutputSequenceIndex, i};
413       result.meanSquareErrors.resize(outputsCount);
414       result.maxSingleErrors.resize(outputsCount);
415       result.inferenceOutputs.resize(outputsCount);
416 
417       if ((flags & FLAG_IGNORE_GOLDEN_OUTPUT) == 0) {
418         if (outputsCount != data.outputs.size()) {
419           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
420                               "Golden/actual outputs (%zu/%zu) count mismatch",
421                               data.outputs.size(), outputsCount);
422           return false;
423         }
424         for (int j = 0; j < outputsCount; ++j) {
425           getOutputError(data.outputs[j].ptr, data.outputs[j].size, &result, j);
426         }
427       }
428 
429       if ((flags & FLAG_DISCARD_INFERENCE_OUTPUT) == 0) {
430         for (int j = 0; j < outputsCount; ++j) {
431           saveInferenceOutput(&result, j);
432         }
433       }
434 
435       if (!sampleResults || (seqInferenceIndex % INFERENCE_OUT_SAMPLE_RATE) == 0) {
436         results->push_back(result);
437       }
438       inferenceTotal += inferenceTime;
439     }
440 
441     // Timeout?
442     if (timeout > 0.001 && inferenceTotal > timeout) {
443       return true;
444     }
445   }
446   return true;
447 }
448 
449 // If cacheDir is not nullptr, compilation caching will be used with NNAPI.
runCompilation(const char * cacheDir,bool useNnapiSl)450 bool BenchmarkModel::runCompilation(const char* cacheDir, bool useNnapiSl) {
451   std::unique_ptr<tflite::StatefulNnApiDelegate> delegate;
452   std::unique_ptr<tflite::Interpreter> interpreter;
453   tflite::ops::builtin::BuiltinOpResolver resolver;
454   tflite::InterpreterBuilder(*mTfliteModel, resolver)(&interpreter);
455   if (!interpreter) {
456     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to create TFlite interpreter");
457     return false;
458   }
459 
460   // Allow Fp16 precision for all models
461   interpreter->SetAllowFp16PrecisionForFp32(true);
462 
463   if (mTfliteBackend == TFLITE_NNAPI) {
464     tflite::StatefulNnApiDelegate::Options nnapi_options;
465     nnapi_options.accelerator_name = mNnApiDeviceName.empty() ? nullptr : mNnApiDeviceName.c_str();
466     if (cacheDir) {
467       nnapi_options.cache_dir = cacheDir;
468       nnapi_options.model_token = mModelFile.c_str();
469     }
470     if (useNnapiSl) {
471       __android_log_print(ANDROID_LOG_INFO, LOG_TAG,
472                           "Use NNAPI SL in compilation caching benchmark.");
473       if (!mNnApiSl) {
474         __android_log_print(ANDROID_LOG_ERROR,
475                             LOG_TAG,
476                             "NNAPI SL is null pointer when running compilation caching benchmark.");
477         return false;
478       }
479       delegate = std::make_unique<tflite::StatefulNnApiDelegate>(mNnApiSl->getFL5(), nnapi_options);
480     } else {
481       delegate = std::make_unique<tflite::StatefulNnApiDelegate>(nnapi_options);
482     }
483     int delegationStatus = interpreter->ModifyGraphWithDelegate(delegate.get());
484     auto nnapiErrno = delegate->GetNnApiErrno();
485     if (delegationStatus != kTfLiteOk || nnapiErrno != ANEURALNETWORKS_NO_ERROR) {
486       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
487                           "Failed to initialize NNAPI Delegate for model %s, nnapi_errno is %d",
488                           mModelFile.c_str(), nnapiErrno);
489       return false;
490     } else {
491       int nnapiPartitions =
492         CountPartitionsDelegatedTo(interpreter.get(), delegate.get());
493       if (nnapiPartitions == 0) {
494         __android_log_print(
495             ANDROID_LOG_ERROR, LOG_TAG,
496             "NNAPI Delegate (%s) for model %s was delegated with %d partitions delegated to NNAPI!!",
497             mNnApiDeviceName.c_str(), mModelFile.c_str(), nnapiPartitions);
498             return false;
499       }
500     }
501   }
502   return true;
503 }
504 
505 // A helper class to manage the lifetime of a temporary cache directory.
506 class ScopedTempDirectory {
507  public:
ScopedTempDirectory(std::string base)508   ScopedTempDirectory(std::string base) : mBase(std::move(base)) {}
~ScopedTempDirectory()509   ~ScopedTempDirectory() { cleanup(); }
510 
511   // Create a new temp directory, remove the old one if needed.
recreate()512   void recreate() {
513     cleanup();
514     mTempDir = mBase + "/XXXXXX";
515     mkdtemp(&mTempDir[0]);
516   }
517 
518   // Get the path to the temp directory.
get() const519   const char* get() const { return mTempDir.empty() ? nullptr : mTempDir.c_str(); }
520 
521  private:
cleanup()522   void cleanup() {
523     if (mTempDir.empty()) {
524       return;
525     }
526     auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
527       return remove(entry);
528     };
529     nftw(mTempDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
530     mTempDir.clear();
531   }
532 
533   std::string mBase;
534   std::string mTempDir;
535 };
536 
getCompilationCacheSize(int * cacheSizeBytes,bool useNnapiSl)537 bool BenchmarkModel::getCompilationCacheSize(int* cacheSizeBytes, bool useNnapiSl) {
538   if (cacheSizeBytes == nullptr) return false;
539 
540   // Create cache files.
541   ScopedTempDirectory tempDir(mCacheDir.value());
542   tempDir.recreate();
543   const bool success = runCompilation(tempDir.get(), useNnapiSl);
544   if (!success) {
545     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
546     return false;
547   }
548 
549   // Compute total size of cache files.
550   int totalSize = 0;
551   DIR* dir = opendir(tempDir.get());
552   if (dir == nullptr) {
553     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to open cache directory");
554     return false;
555   }
556   struct dirent* dp = nullptr;
557   while ((dp = readdir(dir)) != nullptr) {
558     char fullPath[1024];
559     snprintf(fullPath, 1024, "%s/%s", tempDir.get(), dp->d_name);
560     struct stat st;
561     int err = stat(fullPath, &st);
562     if (err != 0) {
563       closedir(dir);
564       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to stat %s", fullPath);
565       return false;
566     }
567     // Only accumulate sizes of regular files. This will exclude '.' and '..'.
568     if (S_ISREG(st.st_mode)) {
569       totalSize += st.st_size;
570     }
571   }
572   closedir(dir);
573   *cacheSizeBytes = totalSize;
574   return true;
575 }
576 
benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,int maxNumIterations,float timeout,bool useNnapiSl,std::vector<float> * results)577 bool BenchmarkModel::benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
578                                                       int maxNumIterations, float timeout,
579                                                       bool useNnapiSl,
580                                                       std::vector<float>* results) {
581   if (results != nullptr) {
582     results->clear();
583   }
584   ScopedTempDirectory tempDir(mCacheDir.value());
585 
586   // Initialize cache files to benchmark cache hit.
587   if (type == CompilationBenchmarkType::PREPARE_FROM_CACHE) {
588     tempDir.recreate();
589     const bool success = runCompilation(tempDir.get(), useNnapiSl);
590     if (!success) {
591       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Save to cache failed");
592       return false;
593     }
594   }
595 
596   float compilationTotal = 0.0;
597   for (int i = 0; i < maxNumIterations; i++) {
598     const char* cacheDir = nullptr;
599     switch (type) {
600       case CompilationBenchmarkType::WITHOUT_CACHE:
601         cacheDir = nullptr;
602         break;
603       case CompilationBenchmarkType::SAVE_TO_CACHE:
604         // Remove the cache files from the last iteration to benchmark cache miss.
605         tempDir.recreate();
606         [[fallthrough]];
607       case CompilationBenchmarkType::PREPARE_FROM_CACHE:
608         cacheDir = tempDir.get();
609         break;
610       default:
611         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Unknown CompilationBenchmarkType: %d",
612                             static_cast<int>(type));
613         return false;
614     }
615 
616     kTraceFunc.ATrace_beginSection("[NN_LA_PC]BenchmarkModel::benchmarkCompilation");
617     const long long startTime = currentTimeInUsec();
618     const bool success = runCompilation(cacheDir, useNnapiSl);
619     const long long endTime = currentTimeInUsec();
620     kTraceFunc.ATrace_endSection();
621     if (!success) {
622       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Compilation %d failed", i);
623       return false;
624     }
625 
626     const float compilationTime = static_cast<float>(endTime - startTime) / 1000000.0f;
627     if (results != nullptr) {
628       results->push_back(compilationTime);
629     }
630 
631     // Timeout?
632     compilationTotal += compilationTime;
633     if (timeout > 0.001 && compilationTotal > timeout) {
634       return true;
635     }
636   }
637   return true;
638 }
639 
benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,int maxNumIterations,float warmupTimeout,float runTimeout,bool useNnapiSl,std::vector<float> * results)640 bool BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
641                                                                 int maxNumIterations,
642                                                                 float warmupTimeout,
643                                                                 float runTimeout,
644                                                                 bool useNnapiSl,
645                                                                 std::vector<float>* results) {
646   kTraceFunc.ATrace_beginSection(
647       "[NN_LA_PWM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
648   bool success = benchmarkSingleTypeOfCompilation(type,
649                                                   maxNumIterations,
650                                                   warmupTimeout,
651                                                   useNnapiSl,
652                                                   nullptr);
653   kTraceFunc.ATrace_endSection();
654   if (!success) return false;
655 
656   kTraceFunc.ATrace_beginSection(
657       "[NN_LA_PBM]BenchmarkModel::benchmarkSingleTypeOfCompilationWithWarmup");
658   success = benchmarkSingleTypeOfCompilation(type,
659                                              maxNumIterations,
660                                              runTimeout,
661                                              useNnapiSl,
662                                              results);
663   kTraceFunc.ATrace_endSection();
664   return success;
665 }
666 
benchmarkCompilation(int maxNumIterations,float warmupTimeout,float runTimeout,bool useNnapiSl,CompilationBenchmarkResult * result)667 bool BenchmarkModel::benchmarkCompilation(int maxNumIterations, float warmupTimeout,
668                                           float runTimeout, bool useNnapiSl,
669                                           CompilationBenchmarkResult* result) {
670   if (result == nullptr) return false;
671 
672   // Benchmark compile without cache.
673   bool success = benchmarkSingleTypeOfCompilationWithWarmup(
674           CompilationBenchmarkType::WITHOUT_CACHE, maxNumIterations,
675           warmupTimeout, runTimeout, useNnapiSl,
676           &result->compileWithoutCacheTimeSec);
677   if (!success) {
678     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
679                         "Failed to benchmark compilation without cache");
680     return false;
681   }
682 
683   // Get compilation cache size.
684   success = getCompilationCacheSize(&result->cacheSizeBytes, useNnapiSl);
685   if (!success) {
686     __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to retrieve compilation cache size");
687     return false;
688   }
689 
690   // Benchmark saving to cache and preparing from cache only if supported.
691   if (result->cacheSizeBytes > 0) {
692     // Benchmark saving to cache.
693     auto& saveToCacheTimeSec = result->saveToCacheTimeSec.emplace();
694     success = benchmarkSingleTypeOfCompilationWithWarmup(
695             CompilationBenchmarkType::SAVE_TO_CACHE, maxNumIterations,
696             warmupTimeout, runTimeout, useNnapiSl,
697             &saveToCacheTimeSec);
698     if (!success) {
699       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark saving to cache");
700       return false;
701     }
702 
703     // Benchmark preparing from cache.
704     auto& prepareFromCacheTimeSec = result->prepareFromCacheTimeSec.emplace();
705     success = benchmarkSingleTypeOfCompilationWithWarmup(
706             CompilationBenchmarkType::PREPARE_FROM_CACHE, maxNumIterations,
707             warmupTimeout, runTimeout, useNnapiSl,
708             &prepareFromCacheTimeSec);
709     if (!success) {
710       __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Failed to benchmark preparing from cache");
711       return false;
712     }
713   }
714   return result;
715 }
716 
dumpAllLayers(const char * path,const std::vector<InferenceInOutSequence> & inOutData)717 bool BenchmarkModel::dumpAllLayers(
718     const char* path, const std::vector<InferenceInOutSequence>& inOutData) {
719   if (inOutData.empty()) {
720     FATAL("Input/output vector is empty");
721   }
722 
723   for (int seqInferenceIndex = 0; seqInferenceIndex < inOutData.size();
724        ++seqInferenceIndex) {
725     resetStates();
726 
727     const InferenceInOutSequence& seq = inOutData[seqInferenceIndex];
728     for (int i = 0; i < seq.size(); ++i) {
729       const InferenceInOut& data = seq[i];
730       setInput(data.input, data.input_size);
731       const bool success = runInference();
732       if (!success) {
733         __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, "Inference %d failed",
734                             i);
735         return false;
736       }
737 
738       // The order of the tensor is not sorted by the tensor index
739       for (int tensor_order = 0; tensor_order < outputs.size(); ++tensor_order) {
740         int tensor_index = outputs[tensor_order];
741         auto* output_tensor = mTfliteInterpreter->tensor(tensor_index);
742         if (output_tensor->data.raw == nullptr) {
743           __android_log_print(ANDROID_LOG_ERROR, LOG_TAG,
744                       "output_tensor->data.raw == nullptr at index %d ", tensor_index);
745           continue;
746         }
747         char fullpath[1024];
748         snprintf(fullpath, 1024, "%s/dump_%.3d_seq_%.3d_order_%.3d_tensor_%.3d", path,
749                  seqInferenceIndex, i, tensor_order, tensor_index);
750         FILE* f = fopen(fullpath, "wb");
751         fwrite(output_tensor->data.raw, output_tensor->bytes, 1, f);
752         fclose(f);
753       }
754     }
755   }
756   return true;
757 }
758