1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
19
20 #include <algorithm>
21 #include <cstdint>
22 #include <string>
23 #include <vector>
24
25 #include "OperationsUtils.h"
26 #include "nnapi/TypeUtils.h"
27 #include "nnapi/Types.h"
28
29 namespace android {
30 namespace nn {
31
32 enum PaddingScheme {
33 kPaddingUnknown = 0,
34 kPaddingSame = 1,
35 kPaddingValid = 2,
36 };
37
38 // Provides inputs and outputs during operation execution.
39 class IOperationExecutionContext {
40 public:
~IOperationExecutionContext()41 virtual ~IOperationExecutionContext() {}
42
43 virtual uint32_t getNumInputs() const = 0;
44 virtual OperandType getInputType(uint32_t index) const = 0;
45 virtual Shape getInputShape(uint32_t index) const = 0;
46 virtual const void* getInputBuffer(uint32_t index) const = 0;
47 virtual const Operand::ExtraParams& getInputExtraParams(uint32_t index) const = 0;
48
49 virtual uint32_t getNumOutputs() const = 0;
50 virtual OperandType getOutputType(uint32_t index) const = 0;
51 virtual Shape getOutputShape(uint32_t index) const = 0;
52 virtual void* getOutputBuffer(uint32_t index) = 0;
53
54 // Updates the output shape, allocating the buffer if necessary.
55 virtual bool setOutputShape(uint32_t index, const Shape& shape) = 0;
56
57 virtual bool isOmittedInput(uint32_t index) const = 0;
58 virtual bool isOmittedOutput(uint32_t index) const = 0;
59
60 template <typename T>
getInputBuffer(uint32_t index)61 const T* getInputBuffer(uint32_t index) const {
62 return reinterpret_cast<const T*>(getInputBuffer(index));
63 }
64
65 template <typename T>
getOutputBuffer(uint32_t index)66 T* getOutputBuffer(uint32_t index) {
67 return reinterpret_cast<T*>(getOutputBuffer(index));
68 }
69
70 template <typename T>
getInputValue(uint32_t index)71 T getInputValue(uint32_t index) const {
72 return getInputBuffer<T>(index)[0];
73 }
74 };
75
76 // Converts an axis index from the range [-dims, dims) into the range [0, dims).
77 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis);
78
handleNegativeAxis(const Shape & shape,int32_t * axis)79 inline bool handleNegativeAxis(const Shape& shape, int32_t* axis) {
80 return handleNegativeAxis(getNumberOfDimensions(shape), axis);
81 }
82
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)83 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
84 int32_t paddingHead, int32_t paddingTail) {
85 return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
86 }
87
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t dilationRate,int32_t paddingHead,int32_t paddingTail)88 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
89 int32_t dilationRate, int32_t paddingHead, int32_t paddingTail) {
90 int32_t effectiveFilterSize = ((filterSize - 1) * dilationRate + 1);
91 return (imageSize - effectiveFilterSize + stride + paddingHead + paddingTail) / stride;
92 }
93
computeOutSizeTransposeConv(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)94 inline int32_t computeOutSizeTransposeConv(int32_t imageSize, int32_t filterSize, int32_t stride,
95 int32_t paddingHead, int32_t paddingTail) {
96 return imageSize * stride + filterSize - stride - paddingHead - paddingTail;
97 }
98
99 [[nodiscard]] bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
100 int32_t* shift);
101
102 [[nodiscard]] bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
103 int32_t* quantized_multiplier,
104 int32_t* right_shift);
105
106 // Same as QuantizeMultiplierSmallerThanOne but returns left shift (i.e. negated
107 // right shift), so that it has the same interface as
108 // QuantizeMultiplierGreaterThanOne and QuantizeMultiplier functions.
109 [[nodiscard]] bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
110 int32_t* quantized_multiplier,
111 int32_t* left_shift);
112
113 [[nodiscard]] bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
114 int32_t* quantized_multiplier, int* left_shift);
115
116 [[nodiscard]] bool GetQuantizedConvolutionMultiplier(const Shape& inputShape,
117 const Shape& filterShape,
118 const Shape& biasShape,
119 const Shape& outputShape, double* multiplier);
120
121 [[nodiscard]] bool GetQuantizedConvolutionMultiplier(const Shape& inputShape,
122 const Shape& filterShape,
123 const Shape& outputShape, double* multiplier);
124
125 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
126 int32_t* act_max);
127
128 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
129 int32_t* act_max);
130
131 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
132 float* activation_max);
133
134 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
135
136 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
137 int32_t filter_size, int32_t padding_implicit,
138 bool isTransposeConv, int32_t* padding_head,
139 int32_t* padding_tail);
140
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)141 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t dilation_factor,
142 int32_t filter_size, int32_t padding_implicit,
143 int32_t* padding_head, int32_t* padding_tail) {
144 calculateExplicitPaddingImpl(in_size, stride, dilation_factor, filter_size, padding_implicit,
145 /*isTransposeConv=*/false, padding_head, padding_tail);
146 }
147
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)148 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
149 int32_t padding_implicit, int32_t* padding_head,
150 int32_t* padding_tail) {
151 calculateExplicitPadding(in_size, stride, 1, filter_size, padding_implicit, padding_head,
152 padding_tail);
153 }
154
calculateExplicitPaddingTransposeConv(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)155 inline void calculateExplicitPaddingTransposeConv(int32_t in_size, int32_t stride,
156 int32_t filter_size, int32_t padding_implicit,
157 int32_t* padding_head, int32_t* padding_tail) {
158 calculateExplicitPaddingImpl(in_size, stride, /*dilation_factor=*/1, filter_size,
159 padding_implicit, /*isTransposeConv=*/true, padding_head,
160 padding_tail);
161 }
162
getPaddingScheme(int32_t inWidth,int32_t inHeight,int32_t strideWidth,int32_t strideHeight,int32_t filterWidth,int32_t filterHeight,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom)163 inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight, int32_t strideWidth,
164 int32_t strideHeight, int32_t filterWidth,
165 int32_t filterHeight, int32_t paddingLeft,
166 int32_t paddingRight, int32_t paddingTop,
167 int32_t paddingBottom) {
168 if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) {
169 return kPaddingValid;
170 }
171
172 int32_t expectedPaddingLeft, expectedPaddingRight;
173 int32_t expectedPaddingTop, expectedPaddingBottom;
174
175 calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame, &expectedPaddingLeft,
176 &expectedPaddingRight);
177 calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame,
178 &expectedPaddingTop, &expectedPaddingBottom);
179 if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight &&
180 expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) {
181 return kPaddingSame;
182 } else {
183 return kPaddingUnknown;
184 }
185 }
186
187 // Reverse order of bits in the mask to match the expected order in kernel
ReverseMaskBits(int mask,int num_dimensions)188 inline int ReverseMaskBits(int mask, int num_dimensions) {
189 int out = 0;
190 for (int dim = 0; dim < num_dimensions; dim++) {
191 out <<= 1;
192 out += (mask & 1);
193 mask >>= 1;
194 }
195 return out;
196 }
197
198 // Compute the positive remainder.
PositiveRemainder(int32_t dividend,int32_t divisor)199 inline int32_t PositiveRemainder(int32_t dividend, int32_t divisor) {
200 return (divisor + (dividend % divisor)) % divisor;
201 }
202
203 // Compute clamped index.
ClampedIndex(int32_t index,int dim,bool pos_stride)204 inline int32_t ClampedIndex(int32_t index, int dim, bool pos_stride) {
205 return pos_stride
206 ? (index >= dim ? dim
207 : PositiveRemainder(std::min(std::max(index, -dim), dim), dim))
208 : (index < -dim
209 ? -1
210 : PositiveRemainder(std::min(std::max(index, -dim), dim - 1), dim));
211 }
212
213 // Broadcasts input shape against one another and puts the result into output
214 // shape. Returns true on success and false on error.
215 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out);
216
217 // Dequantizes a value and quantizes it back using new scale and offset.
218 template <typename T>
219 T requantize(T value, const Shape& oldShape, const Shape& newShape);
220
221 // Preparation functions for the corresponding ops
222 bool floorPrepare(const Shape& input, Shape* output);
223
224 bool depthwiseConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
225 int32_t padding_left, int32_t padding_right, int32_t padding_top,
226 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
227 int32_t depth_multiplier, int32_t dilation_width_factor,
228 int32_t dilation_height_factor, Shape* output);
229
230 bool genericActivationPrepare(const Shape& input, Shape* output);
231
232 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
233 Shape* output);
234
235 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output);
236
237 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output);
238
239 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape);
240
241 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
242 const Shape& valueShape, Shape* outputShape, Shape* hitShape);
243
244 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
245 Shape* output);
246
247 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
248 const Shape& blockSizeShape, Shape* output);
249
250 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
251 const Shape& blockSizeShape, const int32_t* paddingsData,
252 const Shape& paddingsShape, Shape* output);
253
254 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
255 Shape* output);
256
257 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output);
258
259 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs, std::vector<Shape>* output);
260
261 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
262 int32_t padding_left, int32_t padding_right, int32_t padding_top,
263 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
264 int32_t numGroups, Shape* output);
265
266 // Transposes the first two dimensions.
267 template <typename T>
transposeFirstTwoDimensions(const T * buffer,const Shape & shape,T * transposedBuffer)268 inline bool transposeFirstTwoDimensions(const T* buffer, const Shape& shape, T* transposedBuffer) {
269 const int numDims = getNumberOfDimensions(shape);
270 NN_RET_CHECK(numDims >= 2);
271 const int firstDim = getSizeOfDimension(shape, 0);
272 const int secondDim = getSizeOfDimension(shape, 1);
273 int blockSize = 1;
274 for (int i = 2; i < numDims; ++i) {
275 blockSize *= getSizeOfDimension(shape, i);
276 }
277
278 for (int i = 0; i < firstDim; ++i) {
279 for (int j = 0; j < secondDim; ++j) {
280 for (int k = 0; k < blockSize; ++k) {
281 transposedBuffer[(j * firstDim + i) * blockSize + k] =
282 buffer[(i * secondDim + j) * blockSize + k];
283 }
284 }
285 }
286 return true;
287 }
288
transposeFirstTwoDimensions(const Shape & shape,Shape * transposedShape)289 inline bool transposeFirstTwoDimensions(const Shape& shape, Shape* transposedShape) {
290 NN_RET_CHECK(getNumberOfDimensions(shape) >= 2);
291 *transposedShape = shape;
292 transposedShape->dimensions[0] = shape.dimensions[1];
293 transposedShape->dimensions[1] = shape.dimensions[0];
294 return true;
295 }
296
297 // Given two 3-dimensional tensors, merge them into one 3-dimensional tensor
298 // at the third dimension. The merged tensor's third dimension size will be
299 // sum of that of the two inputs.
300 template <typename T>
mergeThirdDimension(const T * bufferA,const std::vector<uint32_t> & dimsA,const T * bufferB,const std::vector<uint32_t> & dimsB,T * merged)301 inline bool mergeThirdDimension(const T* bufferA, const std::vector<uint32_t>& dimsA,
302 const T* bufferB, const std::vector<uint32_t>& dimsB, T* merged) {
303 NN_RET_CHECK_EQ(dimsA.size(), 3u);
304 NN_RET_CHECK_EQ(dimsB.size(), 3u);
305
306 NN_RET_CHECK_EQ(dimsA[0], dimsB[0]);
307 NN_RET_CHECK_EQ(dimsA[1], dimsB[1]);
308
309 for (unsigned int i = 0; i < dimsA[0]; ++i) {
310 for (unsigned int j = 0; j < dimsA[1]; ++j) {
311 for (unsigned int k = 0; k < dimsA[2]; ++k) {
312 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + k] =
313 bufferA[(i * dimsA[1] + j) * dimsA[2] + k];
314 }
315 for (unsigned int k = 0; k < dimsB[2]; ++k) {
316 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + dimsA[2] + k] =
317 bufferB[(i * dimsB[1] + j) * dimsB[2] + k];
318 }
319 }
320 }
321 return true;
322 }
323
324 template <typename T>
325 inline T saturateCast(int32_t val);
326
327 template <>
328 inline uint8_t saturateCast<uint8_t>(int32_t val) {
329 return static_cast<int8_t>(std::max(0, std::min(255, val)));
330 }
331
332 template <>
333 inline int8_t saturateCast<int8_t>(int32_t val) {
334 return static_cast<int8_t>(std::max(-128, std::min(127, val)));
335 }
336
337 } // namespace nn
338 } // namespace android
339
340 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_OPERATIONS_EXECUTION_UTILS_H
341