1 /**
2  * Copyright 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
19 
20 #include "tensorflow/lite/delegates/gpu/delegate.h"
21 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
22 #include "tensorflow/lite/interpreter.h"
23 #include "tensorflow/lite/model.h"
24 #include "tensorflow/lite/nnapi/sl/include/SupportLibrary.h"
25 
26 #include <memory>
27 #include <unistd.h>
28 #include <vector>
29 
30 struct InferenceOutput {
31   uint8_t* ptr;
32   size_t size;
33 };
34 
35 // Inputs and expected outputs for inference
36 struct InferenceInOut {
37   // Input can either be directly specified as a pointer or indirectly with
38   // the createInput callback. This is needed for large datasets where
39   // allocating memory for all inputs at once is not feasible.
40   uint8_t* input;
41   size_t input_size;
42 
43   std::vector<InferenceOutput> outputs;
44   std::function<bool(uint8_t*, size_t)> createInput;
45 };
46 
47 // Inputs and expected outputs for an inference sequence.
48 using InferenceInOutSequence = std::vector<InferenceInOut>;
49 
50 // Result of a single inference
51 struct InferenceResult {
52   float computeTimeSec;
53   // MSE for each output
54   std::vector<float> meanSquareErrors;
55   // Max single error for each output
56   std::vector<float> maxSingleErrors;
57   // Outputs
58   std::vector<std::vector<uint8_t>> inferenceOutputs;
59   int inputOutputSequenceIndex;
60   int inputOutputIndex;
61 };
62 
63 struct CompilationBenchmarkResult {
64   std::vector<float> compileWithoutCacheTimeSec;
65   // The following optional fields have no value if compilation caching is not supported.
66   std::optional<std::vector<float>> saveToCacheTimeSec;
67   std::optional<std::vector<float>> prepareFromCacheTimeSec;
68   // The total size of cache files. It is zero if compilation caching is not supported.
69   int cacheSizeBytes = 0;
70 };
71 
72 /** Discard inference output in inference results. */
73 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
74 /** Do not expect golden output for inference inputs. */
75 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
76 /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/
77 const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2;
78 
79 const int INFERENCE_OUT_SAMPLE_RATE = 10;
80 
81 enum class CompilationBenchmarkType {
82   // Benchmark without cache
83   WITHOUT_CACHE,
84   // Benchmark cache miss
85   SAVE_TO_CACHE,
86   // Benchmark cache hit
87   PREPARE_FROM_CACHE,
88 };
89 
90 /** TFLite backend. */
91 constexpr int TFLITE_CPU = 0;
92 constexpr int TFLITE_NNAPI = 1;
93 constexpr int TFLITE_GPU = 2;
94 
95 class BenchmarkModel {
96  public:
97   ~BenchmarkModel();
98 
99   static BenchmarkModel* create(const char* modelfile, int tfliteBackend,
100                                 bool enable_intermediate_tensors_dump,
101                                 int* nnapiErrno, const char* nnapi_device_name,
102                                 bool mmapModel, const char* nnapi_cache_dir,
103                                 const tflite::nnapi::NnApiSupportLibrary* nnApiSl = nullptr);
104 
105   bool resizeInputTensors(std::vector<int> shape);
106   bool setInput(const uint8_t* dataPtr, size_t length);
107   bool runInference();
108   // Resets TFLite states (RNN/LSTM states etc).
109   bool resetStates();
110 
111   bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
112                  int seqInferencesMaxCount, float timeout, int flags,
113                  std::vector<InferenceResult>* result);
114 
115   bool benchmarkCompilation(int maxNumIterations,
116                             float warmupTimeout,
117                             float runTimeout,
118                             bool useNnapiSl,
119                             CompilationBenchmarkResult* result);
120 
121   bool dumpAllLayers(const char* path,
122                      const std::vector<InferenceInOutSequence>& inOutData);
123 
124  private:
125   BenchmarkModel() = default;
126   bool init(const char* modelfile, int tfliteBackend,
127             bool enable_intermediate_tensors_dump,
128             int* nnapiErrno, const char* nnapi_device_name,
129             /* flag to choose between memory mapping the model and initializing
130                 the model from programs memory*/
131             bool mmapModel,
132             const char* nnapi_cache_dir,
133             const tflite::nnapi::NnApiSupportLibrary* nnApiSl = nullptr);
134 
135   void getOutputError(const uint8_t* dataPtr, size_t length,
136                       InferenceResult* result, int output_index);
137   void saveInferenceOutput(InferenceResult* result, int output_index);
138 
139   bool runCompilation(const char* cacheDir, bool useNnapiSl);
140   bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type,
141                                         int maxNumIterations,
142                                         float timeout,
143                                         bool useNnapiSl,
144                                         std::vector<float>* results);
145   bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
146                                                   int maxNumIterations,
147                                                   float warmupTimeout,
148                                                   float runTimeout,
149                                                   bool useNnapiSl,
150                                                   std::vector<float>* results);
151   bool getCompilationCacheSize(int* cacheSizeBytes, bool useNnapiSl);
152 
153   std::string mModelBuffer;
154   std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
155   std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
156   std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
157   // Store indices of output tensors, used to dump intermediate tensors
158   std::vector<int> outputs;
159 
160   // Parameters for compilation
161   std::string mModelFile;
162   std::optional<std::string> mCacheDir;
163   std::string mNnApiDeviceName;
164   const tflite::nnapi::NnApiSupportLibrary* mNnApiSl = nullptr;
165 #if defined(NN_BENCHMARK_ENABLE_GPU)
166   TfLiteDelegate* mGpuDelegate;
167 #endif  // defined(NN_BENCHMARK_ENABLE_GPU)
168   int mTfliteBackend;
169 };
170 
171 #endif  // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
172