1 /** 2 * Copyright 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 18 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 19 20 #include "tensorflow/lite/delegates/gpu/delegate.h" 21 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 22 #include "tensorflow/lite/interpreter.h" 23 #include "tensorflow/lite/model.h" 24 #include "tensorflow/lite/nnapi/sl/include/SupportLibrary.h" 25 26 #include <memory> 27 #include <unistd.h> 28 #include <vector> 29 30 struct InferenceOutput { 31 uint8_t* ptr; 32 size_t size; 33 }; 34 35 // Inputs and expected outputs for inference 36 struct InferenceInOut { 37 // Input can either be directly specified as a pointer or indirectly with 38 // the createInput callback. This is needed for large datasets where 39 // allocating memory for all inputs at once is not feasible. 40 uint8_t* input; 41 size_t input_size; 42 43 std::vector<InferenceOutput> outputs; 44 std::function<bool(uint8_t*, size_t)> createInput; 45 }; 46 47 // Inputs and expected outputs for an inference sequence. 48 using InferenceInOutSequence = std::vector<InferenceInOut>; 49 50 // Result of a single inference 51 struct InferenceResult { 52 float computeTimeSec; 53 // MSE for each output 54 std::vector<float> meanSquareErrors; 55 // Max single error for each output 56 std::vector<float> maxSingleErrors; 57 // Outputs 58 std::vector<std::vector<uint8_t>> inferenceOutputs; 59 int inputOutputSequenceIndex; 60 int inputOutputIndex; 61 }; 62 63 struct CompilationBenchmarkResult { 64 std::vector<float> compileWithoutCacheTimeSec; 65 // The following optional fields have no value if compilation caching is not supported. 66 std::optional<std::vector<float>> saveToCacheTimeSec; 67 std::optional<std::vector<float>> prepareFromCacheTimeSec; 68 // The total size of cache files. It is zero if compilation caching is not supported. 69 int cacheSizeBytes = 0; 70 }; 71 72 /** Discard inference output in inference results. */ 73 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0; 74 /** Do not expect golden output for inference inputs. */ 75 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1; 76 /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/ 77 const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2; 78 79 const int INFERENCE_OUT_SAMPLE_RATE = 10; 80 81 enum class CompilationBenchmarkType { 82 // Benchmark without cache 83 WITHOUT_CACHE, 84 // Benchmark cache miss 85 SAVE_TO_CACHE, 86 // Benchmark cache hit 87 PREPARE_FROM_CACHE, 88 }; 89 90 /** TFLite backend. */ 91 constexpr int TFLITE_CPU = 0; 92 constexpr int TFLITE_NNAPI = 1; 93 constexpr int TFLITE_GPU = 2; 94 95 class BenchmarkModel { 96 public: 97 ~BenchmarkModel(); 98 99 static BenchmarkModel* create(const char* modelfile, int tfliteBackend, 100 bool enable_intermediate_tensors_dump, 101 int* nnapiErrno, const char* nnapi_device_name, 102 bool mmapModel, const char* nnapi_cache_dir, 103 const tflite::nnapi::NnApiSupportLibrary* nnApiSl = nullptr); 104 105 bool resizeInputTensors(std::vector<int> shape); 106 bool setInput(const uint8_t* dataPtr, size_t length); 107 bool runInference(); 108 // Resets TFLite states (RNN/LSTM states etc). 109 bool resetStates(); 110 111 bool benchmark(const std::vector<InferenceInOutSequence>& inOutData, 112 int seqInferencesMaxCount, float timeout, int flags, 113 std::vector<InferenceResult>* result); 114 115 bool benchmarkCompilation(int maxNumIterations, 116 float warmupTimeout, 117 float runTimeout, 118 bool useNnapiSl, 119 CompilationBenchmarkResult* result); 120 121 bool dumpAllLayers(const char* path, 122 const std::vector<InferenceInOutSequence>& inOutData); 123 124 private: 125 BenchmarkModel() = default; 126 bool init(const char* modelfile, int tfliteBackend, 127 bool enable_intermediate_tensors_dump, 128 int* nnapiErrno, const char* nnapi_device_name, 129 /* flag to choose between memory mapping the model and initializing 130 the model from programs memory*/ 131 bool mmapModel, 132 const char* nnapi_cache_dir, 133 const tflite::nnapi::NnApiSupportLibrary* nnApiSl = nullptr); 134 135 void getOutputError(const uint8_t* dataPtr, size_t length, 136 InferenceResult* result, int output_index); 137 void saveInferenceOutput(InferenceResult* result, int output_index); 138 139 bool runCompilation(const char* cacheDir, bool useNnapiSl); 140 bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, 141 int maxNumIterations, 142 float timeout, 143 bool useNnapiSl, 144 std::vector<float>* results); 145 bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type, 146 int maxNumIterations, 147 float warmupTimeout, 148 float runTimeout, 149 bool useNnapiSl, 150 std::vector<float>* results); 151 bool getCompilationCacheSize(int* cacheSizeBytes, bool useNnapiSl); 152 153 std::string mModelBuffer; 154 std::unique_ptr<tflite::FlatBufferModel> mTfliteModel; 155 std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate; 156 std::unique_ptr<tflite::Interpreter> mTfliteInterpreter; 157 // Store indices of output tensors, used to dump intermediate tensors 158 std::vector<int> outputs; 159 160 // Parameters for compilation 161 std::string mModelFile; 162 std::optional<std::string> mCacheDir; 163 std::string mNnApiDeviceName; 164 const tflite::nnapi::NnApiSupportLibrary* mNnApiSl = nullptr; 165 #if defined(NN_BENCHMARK_ENABLE_GPU) 166 TfLiteDelegate* mGpuDelegate; 167 #endif // defined(NN_BENCHMARK_ENABLE_GPU) 168 int mTfliteBackend; 169 }; 170 171 #endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H 172