1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <string>
23 #include <vector>
24 
25 #include "OperationsUtils.h"
26 #include "nnapi/TypeUtils.h"
27 #include "nnapi/Types.h"
28 
29 namespace android {
30 namespace nn {
31 
32 enum PaddingScheme {
33     kPaddingUnknown = 0,
34     kPaddingSame = 1,
35     kPaddingValid = 2,
36 };
37 
38 // Provides inputs and outputs during operation execution.
39 class IOperationExecutionContext {
40    public:
~IOperationExecutionContext()41     virtual ~IOperationExecutionContext() {}
42 
43     virtual uint32_t getNumInputs() const = 0;
44     virtual OperandType getInputType(uint32_t index) const = 0;
45     virtual Shape getInputShape(uint32_t index) const = 0;
46     virtual const void* getInputBuffer(uint32_t index) const = 0;
47     virtual const Operand::ExtraParams& getInputExtraParams(uint32_t index) const = 0;
48 
49     virtual uint32_t getNumOutputs() const = 0;
50     virtual OperandType getOutputType(uint32_t index) const = 0;
51     virtual Shape getOutputShape(uint32_t index) const = 0;
52     virtual void* getOutputBuffer(uint32_t index) = 0;
53 
54     // Updates the output shape, allocating the buffer if necessary.
55     virtual bool setOutputShape(uint32_t index, const Shape& shape) = 0;
56 
57     virtual bool isOmittedInput(uint32_t index) const = 0;
58     virtual bool isOmittedOutput(uint32_t index) const = 0;
59 
60     template <typename T>
getInputBuffer(uint32_t index)61     const T* getInputBuffer(uint32_t index) const {
62         return reinterpret_cast<const T*>(getInputBuffer(index));
63     }
64 
65     template <typename T>
getOutputBuffer(uint32_t index)66     T* getOutputBuffer(uint32_t index) {
67         return reinterpret_cast<T*>(getOutputBuffer(index));
68     }
69 
70     template <typename T>
getInputValue(uint32_t index)71     T getInputValue(uint32_t index) const {
72         return getInputBuffer<T>(index)[0];
73     }
74 };
75 
76 // Converts an axis index from the range [-dims, dims) into the range [0, dims).
77 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis);
78 
handleNegativeAxis(const Shape & shape,int32_t * axis)79 inline bool handleNegativeAxis(const Shape& shape, int32_t* axis) {
80     return handleNegativeAxis(getNumberOfDimensions(shape), axis);
81 }
82 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)83 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
84                               int32_t paddingHead, int32_t paddingTail) {
85     return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
86 }
87 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t dilationRate,int32_t paddingHead,int32_t paddingTail)88 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
89                               int32_t dilationRate, int32_t paddingHead, int32_t paddingTail) {
90     int32_t effectiveFilterSize = ((filterSize - 1) * dilationRate + 1);
91     return (imageSize - effectiveFilterSize + stride + paddingHead + paddingTail) / stride;
92 }
93 
computeOutSizeTransposeConv(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)94 inline int32_t computeOutSizeTransposeConv(int32_t imageSize, int32_t filterSize, int32_t stride,
95                                            int32_t paddingHead, int32_t paddingTail) {
96     return imageSize * stride + filterSize - stride - paddingHead - paddingTail;
97 }
98 
99 [[nodiscard]] bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
100                                       int32_t* shift);
101 
102 [[nodiscard]] bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
103                                                     int32_t* quantized_multiplier,
104                                                     int32_t* right_shift);
105 
106 // Same as QuantizeMultiplierSmallerThanOne but returns left shift (i.e. negated
107 // right shift), so that it has the same interface as
108 // QuantizeMultiplierGreaterThanOne and QuantizeMultiplier functions.
109 [[nodiscard]] bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
110                                                        int32_t* quantized_multiplier,
111                                                        int32_t* left_shift);
112 
113 [[nodiscard]] bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
114                                                     int32_t* quantized_multiplier, int* left_shift);
115 
116 [[nodiscard]] bool GetQuantizedConvolutionMultiplier(const Shape& inputShape,
117                                                      const Shape& filterShape,
118                                                      const Shape& biasShape,
119                                                      const Shape& outputShape, double* multiplier);
120 
121 [[nodiscard]] bool GetQuantizedConvolutionMultiplier(const Shape& inputShape,
122                                                      const Shape& filterShape,
123                                                      const Shape& outputShape, double* multiplier);
124 
125 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
126                                    int32_t* act_max);
127 
128 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
129                                   int32_t* act_max);
130 
131 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
132                                    float* activation_max);
133 
134 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
135 
136 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
137                                   int32_t filter_size, int32_t padding_implicit,
138                                   bool isTransposeConv, int32_t* padding_head,
139                                   int32_t* padding_tail);
140 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)141 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t dilation_factor,
142                                      int32_t filter_size, int32_t padding_implicit,
143                                      int32_t* padding_head, int32_t* padding_tail) {
144     calculateExplicitPaddingImpl(in_size, stride, dilation_factor, filter_size, padding_implicit,
145                                  /*isTransposeConv=*/false, padding_head, padding_tail);
146 }
147 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)148 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
149                                      int32_t padding_implicit, int32_t* padding_head,
150                                      int32_t* padding_tail) {
151     calculateExplicitPadding(in_size, stride, 1, filter_size, padding_implicit, padding_head,
152                              padding_tail);
153 }
154 
calculateExplicitPaddingTransposeConv(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)155 inline void calculateExplicitPaddingTransposeConv(int32_t in_size, int32_t stride,
156                                                   int32_t filter_size, int32_t padding_implicit,
157                                                   int32_t* padding_head, int32_t* padding_tail) {
158     calculateExplicitPaddingImpl(in_size, stride, /*dilation_factor=*/1, filter_size,
159                                  padding_implicit, /*isTransposeConv=*/true, padding_head,
160                                  padding_tail);
161 }
162 
getPaddingScheme(int32_t inWidth,int32_t inHeight,int32_t strideWidth,int32_t strideHeight,int32_t filterWidth,int32_t filterHeight,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom)163 inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight, int32_t strideWidth,
164                                       int32_t strideHeight, int32_t filterWidth,
165                                       int32_t filterHeight, int32_t paddingLeft,
166                                       int32_t paddingRight, int32_t paddingTop,
167                                       int32_t paddingBottom) {
168     if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) {
169         return kPaddingValid;
170     }
171 
172     int32_t expectedPaddingLeft, expectedPaddingRight;
173     int32_t expectedPaddingTop, expectedPaddingBottom;
174 
175     calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame, &expectedPaddingLeft,
176                              &expectedPaddingRight);
177     calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame,
178                              &expectedPaddingTop, &expectedPaddingBottom);
179     if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight &&
180         expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) {
181         return kPaddingSame;
182     } else {
183         return kPaddingUnknown;
184     }
185 }
186 
187 // Reverse order of bits in the mask to match the expected order in kernel
ReverseMaskBits(int mask,int num_dimensions)188 inline int ReverseMaskBits(int mask, int num_dimensions) {
189     int out = 0;
190     for (int dim = 0; dim < num_dimensions; dim++) {
191         out <<= 1;
192         out += (mask & 1);
193         mask >>= 1;
194     }
195     return out;
196 }
197 
198 // Compute the positive remainder.
PositiveRemainder(int32_t dividend,int32_t divisor)199 inline int32_t PositiveRemainder(int32_t dividend, int32_t divisor) {
200     return (divisor + (dividend % divisor)) % divisor;
201 }
202 
203 // Compute clamped index.
ClampedIndex(int32_t index,int dim,bool pos_stride)204 inline int32_t ClampedIndex(int32_t index, int dim, bool pos_stride) {
205     return pos_stride
206                    ? (index >= dim ? dim
207                                    : PositiveRemainder(std::min(std::max(index, -dim), dim), dim))
208                    : (index < -dim
209                               ? -1
210                               : PositiveRemainder(std::min(std::max(index, -dim), dim - 1), dim));
211 }
212 
213 // Broadcasts input shape against one another and puts the result into output
214 // shape. Returns true on success and false on error.
215 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out);
216 
217 // Dequantizes a value and quantizes it back using new scale and offset.
218 template <typename T>
219 T requantize(T value, const Shape& oldShape, const Shape& newShape);
220 
221 // Preparation functions for the corresponding ops
222 bool floorPrepare(const Shape& input, Shape* output);
223 
224 bool depthwiseConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
225                           int32_t padding_left, int32_t padding_right, int32_t padding_top,
226                           int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
227                           int32_t depth_multiplier, int32_t dilation_width_factor,
228                           int32_t dilation_height_factor, Shape* output);
229 
230 bool genericActivationPrepare(const Shape& input, Shape* output);
231 
232 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
233                     Shape* output);
234 
235 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output);
236 
237 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output);
238 
239 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape);
240 
241 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
242                             const Shape& valueShape, Shape* outputShape, Shape* hitShape);
243 
244 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
245                 Shape* output);
246 
247 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
248                          const Shape& blockSizeShape, Shape* output);
249 
250 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
251                          const Shape& blockSizeShape, const int32_t* paddingsData,
252                          const Shape& paddingsShape, Shape* output);
253 
254 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
255                  Shape* output);
256 
257 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output);
258 
259 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs, std::vector<Shape>* output);
260 
261 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
262                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
263                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
264                         int32_t numGroups, Shape* output);
265 
266 // Transposes the first two dimensions.
267 template <typename T>
transposeFirstTwoDimensions(const T * buffer,const Shape & shape,T * transposedBuffer)268 inline bool transposeFirstTwoDimensions(const T* buffer, const Shape& shape, T* transposedBuffer) {
269     const int numDims = getNumberOfDimensions(shape);
270     NN_RET_CHECK(numDims >= 2);
271     const int firstDim = getSizeOfDimension(shape, 0);
272     const int secondDim = getSizeOfDimension(shape, 1);
273     int blockSize = 1;
274     for (int i = 2; i < numDims; ++i) {
275         blockSize *= getSizeOfDimension(shape, i);
276     }
277 
278     for (int i = 0; i < firstDim; ++i) {
279         for (int j = 0; j < secondDim; ++j) {
280             for (int k = 0; k < blockSize; ++k) {
281                 transposedBuffer[(j * firstDim + i) * blockSize + k] =
282                         buffer[(i * secondDim + j) * blockSize + k];
283             }
284         }
285     }
286     return true;
287 }
288 
transposeFirstTwoDimensions(const Shape & shape,Shape * transposedShape)289 inline bool transposeFirstTwoDimensions(const Shape& shape, Shape* transposedShape) {
290     NN_RET_CHECK(getNumberOfDimensions(shape) >= 2);
291     *transposedShape = shape;
292     transposedShape->dimensions[0] = shape.dimensions[1];
293     transposedShape->dimensions[1] = shape.dimensions[0];
294     return true;
295 }
296 
297 // Given two 3-dimensional tensors, merge them into one 3-dimensional tensor
298 // at the third dimension. The merged tensor's third dimension size will be
299 // sum of that of the two inputs.
300 template <typename T>
mergeThirdDimension(const T * bufferA,const std::vector<uint32_t> & dimsA,const T * bufferB,const std::vector<uint32_t> & dimsB,T * merged)301 inline bool mergeThirdDimension(const T* bufferA, const std::vector<uint32_t>& dimsA,
302                                 const T* bufferB, const std::vector<uint32_t>& dimsB, T* merged) {
303     NN_RET_CHECK_EQ(dimsA.size(), 3u);
304     NN_RET_CHECK_EQ(dimsB.size(), 3u);
305 
306     NN_RET_CHECK_EQ(dimsA[0], dimsB[0]);
307     NN_RET_CHECK_EQ(dimsA[1], dimsB[1]);
308 
309     for (unsigned int i = 0; i < dimsA[0]; ++i) {
310         for (unsigned int j = 0; j < dimsA[1]; ++j) {
311             for (unsigned int k = 0; k < dimsA[2]; ++k) {
312                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + k] =
313                         bufferA[(i * dimsA[1] + j) * dimsA[2] + k];
314             }
315             for (unsigned int k = 0; k < dimsB[2]; ++k) {
316                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + dimsA[2] + k] =
317                         bufferB[(i * dimsB[1] + j) * dimsB[2] + k];
318             }
319         }
320     }
321     return true;
322 }
323 
324 template <typename T>
325 inline T saturateCast(int32_t val);
326 
327 template <>
328 inline uint8_t saturateCast<uint8_t>(int32_t val) {
329     return static_cast<int8_t>(std::max(0, std::min(255, val)));
330 }
331 
332 template <>
333 inline int8_t saturateCast<int8_t>(int32_t val) {
334     return static_cast<int8_t>(std::max(-128, std::min(127, val)));
335 }
336 
337 }  // namespace nn
338 }  // namespace android
339 
340 #endif  // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
341