1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include "Pooling.h"
20 
21 #include <vector>
22 
23 #include "OperationResolver.h"
24 #include "Tracing.h"
25 #include "nnapi/Validation.h"
26 
27 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
28 #pragma clang diagnostic push
29 #pragma clang diagnostic ignored "-Wunused-parameter"
30 #pragma clang diagnostic ignored "-Wsign-compare"
31 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
32 #include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
33 #include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
34 #pragma clang diagnostic pop
35 
36 #include "CpuOperationUtils.h"
37 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
38 
39 namespace android {
40 namespace nn {
41 
42 namespace pooling {
43 
44 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
45 namespace {
46 
47 struct PoolingParam {
48     int32_t padding_left, padding_right;
49     int32_t padding_top, padding_bottom;
50     int32_t stride_width, stride_height;
51     int32_t filter_width, filter_height;
52     int32_t activation;
53     bool useNchw = false;
54 
initializeandroid::nn::pooling::__anon58b2b3230111::PoolingParam55     bool initialize(const IOperationExecutionContext* context) {
56         uint32_t inCount = context->getNumInputs();
57         int32_t padding_implicit = 0;
58         if (inCount >= 10) {
59             padding_left = context->getInputValue<int32_t>(1);
60             padding_right = context->getInputValue<int32_t>(2);
61             padding_top = context->getInputValue<int32_t>(3);
62             padding_bottom = context->getInputValue<int32_t>(4);
63             stride_width = context->getInputValue<int32_t>(5);
64             stride_height = context->getInputValue<int32_t>(6);
65             filter_width = context->getInputValue<int32_t>(7);
66             filter_height = context->getInputValue<int32_t>(8);
67             activation = context->getInputValue<int32_t>(9);
68             if (inCount == 11) {
69                 useNchw = context->getInputValue<bool>(10);
70             }
71         } else {
72             padding_implicit = context->getInputValue<int32_t>(1);
73             stride_width = context->getInputValue<int32_t>(2);
74             stride_height = context->getInputValue<int32_t>(3);
75             filter_width = context->getInputValue<int32_t>(4);
76             filter_height = context->getInputValue<int32_t>(5);
77             activation = context->getInputValue<int32_t>(6);
78             if (inCount == 8) {
79                 useNchw = context->getInputValue<bool>(7);
80             }
81         }
82         if (inCount <= 8) {
83             Shape inputShape = context->getInputShape(kInputTensor);
84             int32_t input_height = getSizeOfDimension(inputShape, useNchw ? 2 : 1);
85             int32_t input_width = getSizeOfDimension(inputShape, useNchw ? 3 : 2);
86             calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
87                                      &padding_left, &padding_right);
88             calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
89                                      &padding_top, &padding_bottom);
90         }
91         NN_RET_CHECK_GE(padding_left, 0);
92         NN_RET_CHECK_GE(padding_right, 0);
93         NN_RET_CHECK_GE(padding_top, 0);
94         NN_RET_CHECK_GE(padding_bottom, 0);
95         NN_RET_CHECK_GT(stride_width, 0);
96         NN_RET_CHECK_GT(stride_height, 0);
97         NN_RET_CHECK_GT(filter_width, 0);
98         NN_RET_CHECK_GT(filter_height, 0);
99         NN_RET_CHECK_GE(activation, 0);
100         NN_RET_CHECK_GT(filter_width, padding_left);
101         NN_RET_CHECK_GT(filter_width, padding_right);
102         NN_RET_CHECK_GT(filter_height, padding_top);
103         NN_RET_CHECK_GT(filter_height, padding_bottom);
104         return true;
105     }
106 
toTfliteParamandroid::nn::pooling::__anon58b2b3230111::PoolingParam107     tflite::PoolParams toTfliteParam(const Shape& output) const {
108         tflite::PoolParams params = {
109                 .padding_values = {.width = static_cast<int16_t>(padding_left),
110                                    .height = static_cast<int16_t>(padding_top),
111                                    .width_offset = 0,
112                                    .height_offset = 0},
113                 .stride_height = stride_height,
114                 .stride_width = stride_width,
115                 .filter_height = filter_height,
116                 .filter_width = filter_width,
117         };
118         if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
119             int32_t output_activation_min = 0;
120             int32_t output_activation_max = 0;
121             CalculateActivationRangeUint8(activation, output, &output_activation_min,
122                                           &output_activation_max);
123             params.quantized_activation_min = output_activation_min;
124             params.quantized_activation_max = output_activation_max;
125         } else if (output.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
126             int32_t output_activation_min = 0;
127             int32_t output_activation_max = 0;
128             CalculateActivationRangeInt8(activation, output, &output_activation_min,
129                                          &output_activation_max);
130             params.quantized_activation_min = output_activation_min;
131             params.quantized_activation_max = output_activation_max;
132         } else {
133             float output_activation_min, output_activation_max;
134             CalculateActivationRangeFloat(activation, &output_activation_min,
135                                           &output_activation_max);
136             params.float_activation_min = output_activation_min;
137             params.float_activation_max = output_activation_max;
138         }
139         return params;
140     }
141 };
142 
averagePoolNhwc(const float * inputData,const Shape & inputShape,const PoolingParam & param,float * outputData,const Shape & outputShape)143 bool averagePoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
144                      float* outputData, const Shape& outputShape) {
145     NNTRACE_TRANS("averagePoolFloat32");
146     auto op_params = param.toTfliteParam(outputShape);
147     NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
148     tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
149                                        convertShapeToTflshape(outputShape), outputData);
150     return true;
151 }
152 
averagePoolNhwc(const _Float16 * inputData,const Shape & inputShape,const PoolingParam & param,_Float16 * outputData,const Shape & outputShape)153 bool averagePoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
154                      _Float16* outputData, const Shape& outputShape) {
155     NNTRACE_TRANS("averagePoolFloat16");
156     std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
157     std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
158 
159     convertFloat16ToFloat32(inputData, &inputDataFloat32);
160     averagePoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(),
161                     outputShape);
162     convertFloat32ToFloat16(outputDataFloat32, outputData);
163     return true;
164 }
165 
averagePoolNhwc(const uint8_t * inputData,const Shape & inputShape,const PoolingParam & param,uint8_t * outputData,const Shape & outputShape)166 bool averagePoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
167                      uint8_t* outputData, const Shape& outputShape) {
168     NNTRACE_TRANS("averagePoolQuant8");
169     auto op_params = param.toTfliteParam(outputShape);
170     NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
171     tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
172                                        convertShapeToTflshape(outputShape), outputData);
173     return true;
174 }
175 
averagePoolNhwc(const int8_t * inputData,const Shape & inputShape,const PoolingParam & param,int8_t * outputData,const Shape & outputShape)176 bool averagePoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
177                      int8_t* outputData, const Shape& outputShape) {
178     NNTRACE_TRANS("averagePoolQuant8Signed");
179     auto op_params = param.toTfliteParam(outputShape);
180     NNTRACE_COMP_SWITCH("optimized_integer_ops::AveragePool");
181     // We are using reference implementation of the AveragePool op because the
182     // optimized version fails to pass some of the quantization coupling tests.
183     tflite::reference_integer_ops::AveragePool(op_params, convertShapeToTflshape(inputShape),
184                                                inputData, convertShapeToTflshape(outputShape),
185                                                outputData);
186     return true;
187 }
188 
l2PoolNhwc(const float * inputData,const Shape & inputShape,const PoolingParam & param,float * outputData,const Shape & outputShape)189 bool l2PoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
190                 float* outputData, const Shape& outputShape) {
191     NNTRACE_TRANS("l2PoolFloat32");
192     auto op_params = param.toTfliteParam(outputShape);
193     NNTRACE_COMP_SWITCH("optimized_ops::L2Pool");
194     tflite::optimized_ops::L2Pool(op_params, convertShapeToTflshape(inputShape), inputData,
195                                   convertShapeToTflshape(outputShape), outputData);
196     return true;
197 }
198 
l2PoolNhwc(const _Float16 * inputData,const Shape & inputShape,const PoolingParam & param,_Float16 * outputData,const Shape & outputShape)199 bool l2PoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
200                 _Float16* outputData, const Shape& outputShape) {
201     NNTRACE_TRANS("l2PoolFloat16");
202     std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
203     std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
204 
205     convertFloat16ToFloat32(inputData, &inputDataFloat32);
206     l2PoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(), outputShape);
207     convertFloat32ToFloat16(outputDataFloat32, outputData);
208     return true;
209 }
210 
maxPoolNhwc(const float * inputData,const Shape & inputShape,const PoolingParam & param,float * outputData,const Shape & outputShape)211 bool maxPoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
212                  float* outputData, const Shape& outputShape) {
213     NNTRACE_TRANS("maxPoolFloat32");
214     auto op_params = param.toTfliteParam(outputShape);
215     NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
216     tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
217                                    convertShapeToTflshape(outputShape), outputData);
218     return true;
219 }
220 
maxPoolNhwc(const uint8_t * inputData,const Shape & inputShape,const PoolingParam & param,uint8_t * outputData,const Shape & outputShape)221 bool maxPoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
222                  uint8_t* outputData, const Shape& outputShape) {
223     NNTRACE_TRANS("maxPoolQuant8");
224     auto op_params = param.toTfliteParam(outputShape);
225     NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
226     tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
227                                    convertShapeToTflshape(outputShape), outputData);
228     return true;
229 }
230 
maxPoolNhwc(const int8_t * inputData,const Shape & inputShape,const PoolingParam & param,int8_t * outputData,const Shape & outputShape)231 bool maxPoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
232                  int8_t* outputData, const Shape& outputShape) {
233     NNTRACE_TRANS("maxPoolQuant8Signed");
234     auto op_params = param.toTfliteParam(outputShape);
235     NNTRACE_COMP_SWITCH("optimized_integer_ops::MaxPool");
236     // We are using reference implementation of the MaxPool op because the
237     // optimized version fails to pass some of the quantization coupling tests.
238     tflite::reference_integer_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
239                                            convertShapeToTflshape(outputShape), outputData);
240     return true;
241 }
242 
maxPoolNhwc(const _Float16 * inputData,const Shape & inputShape,const PoolingParam & param,_Float16 * outputData,const Shape & outputShape)243 bool maxPoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
244                  _Float16* outputData, const Shape& outputShape) {
245     NNTRACE_TRANS("maxPoolFloat16");
246     std::vector<float> inputData_float32(getNumberOfElements(inputShape));
247     std::vector<float> outputData_float32(getNumberOfElements(outputShape));
248 
249     convertFloat16ToFloat32(inputData, &inputData_float32);
250     maxPoolNhwc(inputData_float32.data(), inputShape, param, outputData_float32.data(),
251                 outputShape);
252     convertFloat32ToFloat16(outputData_float32, outputData);
253     return true;
254 }
255 
256 template <typename T>
averagePool(const T * inputData,const Shape & inputShape,const PoolingParam & param,T * outputData,const Shape & outputShape)257 bool averagePool(const T* inputData, const Shape& inputShape, const PoolingParam& param,
258                  T* outputData, const Shape& outputShape) {
259     InputWithLayout<T> input(param.useNchw);
260     OutputWithLayout<T> output(param.useNchw);
261     NN_RET_CHECK(input.initialize(inputData, inputShape));
262     NN_RET_CHECK(output.initialize(outputData, outputShape));
263     NN_RET_CHECK(averagePoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
264                                  output.getNhwcBuffer(), output.getNhwcShape()));
265     NN_RET_CHECK(output.commit());
266     return true;
267 }
268 
269 template <typename T>
l2Pool(const T * inputData,const Shape & inputShape,const PoolingParam & param,T * outputData,const Shape & outputShape)270 bool l2Pool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
271             const Shape& outputShape) {
272     InputWithLayout<T> input(param.useNchw);
273     OutputWithLayout<T> output(param.useNchw);
274     NN_RET_CHECK(input.initialize(inputData, inputShape));
275     NN_RET_CHECK(output.initialize(outputData, outputShape));
276     NN_RET_CHECK(l2PoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
277                             output.getNhwcBuffer(), output.getNhwcShape()));
278     NN_RET_CHECK(output.commit());
279     return true;
280 }
281 
282 template <typename T>
maxPool(const T * inputData,const Shape & inputShape,const PoolingParam & param,T * outputData,const Shape & outputShape)283 bool maxPool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
284              const Shape& outputShape) {
285     InputWithLayout<T> input(param.useNchw);
286     OutputWithLayout<T> output(param.useNchw);
287     NN_RET_CHECK(input.initialize(inputData, inputShape));
288     NN_RET_CHECK(output.initialize(outputData, outputShape));
289     NN_RET_CHECK(maxPoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
290                              output.getNhwcBuffer(), output.getNhwcShape()));
291     NN_RET_CHECK(output.commit());
292     return true;
293 }
294 
295 }  // namespace
296 
prepare(IOperationExecutionContext * context)297 bool prepare(IOperationExecutionContext* context) {
298     Shape input = context->getInputShape(kInputTensor);
299     NN_RET_CHECK_EQ(getNumberOfDimensions(input), 4u);
300 
301     PoolingParam param;
302     NN_RET_CHECK(param.initialize(context));
303 
304     // Only batches can be zero.
305     uint32_t batches = getSizeOfDimension(input, 0);
306     uint32_t height = getSizeOfDimension(input, param.useNchw ? 2 : 1);
307     uint32_t width = getSizeOfDimension(input, param.useNchw ? 3 : 2);
308     uint32_t channels = getSizeOfDimension(input, param.useNchw ? 1 : 3);
309     NN_RET_CHECK_GT(height, 0u);
310     NN_RET_CHECK_GT(width, 0u);
311     NN_RET_CHECK_GT(channels, 0u);
312 
313     uint32_t outWidth = computeOutSize(width, param.filter_width, param.stride_width,
314                                        param.padding_left, param.padding_right);
315     uint32_t outHeight = computeOutSize(height, param.filter_height, param.stride_height,
316                                         param.padding_top, param.padding_bottom);
317 
318     Shape output = input;
319     if (param.useNchw) {
320         output.dimensions = {batches, channels, outHeight, outWidth};
321     } else {
322         output.dimensions = {batches, outHeight, outWidth, channels};
323     }
324     return context->setOutputShape(kOutputTensor, output);
325 }
326 
327 #define POOLING_DISPATCH_INPUT_TYPE(name, type, cppType)              \
328     case OperandType::type:                                           \
329         return name(context->getInputBuffer<cppType>(kInputTensor),   \
330                     context->getInputShape(kInputTensor), param,      \
331                     context->getOutputBuffer<cppType>(kOutputTensor), \
332                     context->getOutputShape(kOutputTensor))
333 
executeAveragePool(IOperationExecutionContext * context)334 bool executeAveragePool(IOperationExecutionContext* context) {
335     // Bypass execution in the case of zero-sized input.
336     if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
337     PoolingParam param;
338     NN_RET_CHECK(param.initialize(context));
339     switch (context->getInputType(kInputTensor)) {
340         POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT32, float);
341         POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT16, _Float16);
342         POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM, uint8_t);
343         POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
344         default:
345             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation AVERAGE_POOL_2D";
346     }
347 }
348 
executeL2Pool(IOperationExecutionContext * context)349 bool executeL2Pool(IOperationExecutionContext* context) {
350     // Bypass execution in the case of zero-sized input.
351     if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
352     PoolingParam param;
353     NN_RET_CHECK(param.initialize(context));
354     switch (context->getInputType(kInputTensor)) {
355         POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT32, float);
356         POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT16, _Float16);
357         default:
358             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation L2_POOL_2D";
359     }
360 }
361 
executeMaxPool(IOperationExecutionContext * context)362 bool executeMaxPool(IOperationExecutionContext* context) {
363     // Bypass execution in the case of zero-sized input.
364     if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
365     PoolingParam param;
366     NN_RET_CHECK(param.initialize(context));
367     switch (context->getInputType(kInputTensor)) {
368         POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT32, float);
369         POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT16, _Float16);
370         POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM, uint8_t);
371         POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
372         default:
373             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation MAX_POOL_2D";
374     }
375 }
376 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
377 
378 #undef POOLING_DISPATCH_INPUT_TYPE
379 
380 }  // namespace pooling
381 
382 NN_REGISTER_OPERATION_DEFAULT_VALIDATION(AVERAGE_POOL_2D, pooling::prepare,
383                                          pooling::executeAveragePool, .allowZeroSizedInput = true);
384 NN_REGISTER_OPERATION_DEFAULT_VALIDATION(L2_POOL_2D, pooling::prepare, pooling::executeL2Pool,
385                                          .allowZeroSizedInput = true);
386 NN_REGISTER_OPERATION_DEFAULT_VALIDATION(MAX_POOL_2D, pooling::prepare, pooling::executeMaxPool,
387                                          .allowZeroSizedInput = true);
388 
389 }  // namespace nn
390 }  // namespace android
391