1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "OperationsUtils"
18 
19 #include "OperationsExecutionUtils.h"
20 
21 #include <android-base/logging.h>
22 
23 #include <algorithm>
24 #include <cmath>
25 #include <limits>
26 #include <sstream>
27 #include <vector>
28 
29 #include "ActivationFunctor.h"
30 #include "nnapi/Validation.h"
31 
32 namespace android {
33 namespace nn {
34 
35 namespace {
36 
CalculateActivationRangeImpl(int32_t activation,const Shape & outputShape,int32_t qmin,int32_t qmax,int32_t * act_min,int32_t * act_max)37 void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin,
38                                   int32_t qmax, int32_t* act_min, int32_t* act_max) {
39     const auto scale = outputShape.scale;
40     const auto zero_point = outputShape.offset;
41 
42     auto quantize = [scale, zero_point](float f) {
43         return zero_point + static_cast<int32_t>(std::round(f / scale));
44     };
45 
46     if (activation == kActivationRelu) {
47         *act_min = std::max(qmin, quantize(0.0));
48         *act_max = qmax;
49     } else if (activation == kActivationRelu6) {
50         *act_min = std::max(qmin, quantize(0.0));
51         *act_max = std::min(qmax, quantize(6.0));
52     } else if (activation == kActivationRelu1) {
53         *act_min = std::max(qmin, quantize(-1.0));
54         *act_max = std::min(qmax, quantize(1.0));
55     } else if (activation == kActivationNone) {
56         *act_min = qmin;
57         *act_max = qmax;
58     } else {
59         LOG(ERROR) << "Unsupported fused activation function.";
60     }
61 }
62 
63 }  // namespace
64 
handleNegativeAxis(int32_t numberOfDimensions,int32_t * axis)65 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) {
66     NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions);
67     if (*axis < 0) {
68         *axis += numberOfDimensions;
69     }
70     return true;
71 }
72 
QuantizeMultiplier(double double_multiplier,int32_t * quantized_multiplier,int32_t * shift)73 bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) {
74     if (double_multiplier == 0.) {
75         *quantized_multiplier = 0;
76         *shift = 0;
77         return true;
78     }
79     const double q = std::frexp(double_multiplier, shift);
80     auto q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
81     NN_RET_CHECK(q_fixed <= (1LL << 31));
82     if (q_fixed == (1LL << 31)) {
83         q_fixed /= 2;
84         ++*shift;
85     }
86     NN_RET_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
87     // A shift amount smaller than -31 would cause all bits to be shifted out
88     // and thus all results would be zero. We implement that instead with
89     // q_fixed==0, so as to avoid hitting issues with right-shift
90     // operations with shift amounts greater than 31. Note that this happens
91     // roughly when abs(double_multiplier) < 2^-31 and the present handling means
92     // that we're effectively flushing tiny double_multiplier's to zero.
93     // We could conceivably handle values in the range (roughly) [32, 63]
94     // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
95     // the present handling is just doing 'flush denormals to zero'. We could
96     // reconsider and actually generate nonzero denormals if a need arises.
97     if (*shift < -31) {
98         *shift = 0;
99         q_fixed = 0;
100     }
101     *quantized_multiplier = static_cast<int32_t>(q_fixed);
102     return true;
103 }
104 
QuantizeMultiplierSmallerThanOneExp(double double_multiplier,int32_t * quantized_multiplier,int32_t * left_shift)105 bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier,
106                                          int32_t* left_shift) {
107     NN_RET_CHECK(double_multiplier > 0.);
108     NN_RET_CHECK(double_multiplier < 1.);
109     NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift));
110     NN_RET_CHECK(*left_shift <= 0);
111     return true;
112 }
113 
QuantizeMultiplierSmallerThanOne(double double_multiplier,int32_t * quantized_multiplier,int32_t * right_shift)114 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
115                                       int32_t* right_shift) {
116     NN_OPS_CHECK(double_multiplier >= 0.);
117     NN_OPS_CHECK(double_multiplier < 1.);
118     if (double_multiplier == 0.) {
119         *quantized_multiplier = 0;
120         *right_shift = 0;
121         return true;
122     }
123     NN_OPS_CHECK(double_multiplier > 0.);
124     const double q = std::frexp(double_multiplier, right_shift);
125     *right_shift *= -1;
126     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
127     NN_OPS_CHECK(q_fixed <= (1LL << 31));
128     if (q_fixed == (1LL << 31)) {
129         q_fixed /= 2;
130         --*right_shift;
131     }
132     NN_OPS_CHECK(*right_shift >= 0);
133     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
134     *quantized_multiplier = static_cast<int32_t>(q_fixed);
135     return true;
136 }
137 
QuantizeMultiplierGreaterThanOne(double double_multiplier,int32_t * quantized_multiplier,int * left_shift)138 bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
139                                       int* left_shift) {
140     NN_OPS_CHECK(double_multiplier > 1.);
141     const double q = std::frexp(double_multiplier, left_shift);
142     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
143     NN_OPS_CHECK(q_fixed <= (1LL << 31));
144     if (q_fixed == (1LL << 31)) {
145         q_fixed /= 2;
146         ++*left_shift;
147     }
148     NN_OPS_CHECK(*left_shift >= 0);
149     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
150     *quantized_multiplier = static_cast<int32_t>(q_fixed);
151     return true;
152 }
153 
GetQuantizedConvolutionMultiplier(const Shape & inputShape,const Shape & filterShape,const Shape & biasShape,const Shape & outputShape,double * multiplier)154 bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape,
155                                        const Shape& biasShape, const Shape& outputShape,
156                                        double* multiplier) {
157     // Upcast bias and input_product to double
158     const double input_product_scale = inputShape.scale * filterShape.scale;
159     const double bias_scale = biasShape.scale;
160 
161     // The following conditions must be guaranteed by the training pipeline.
162     NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
163                  1e-6 * std::min(input_product_scale, bias_scale));
164     NN_OPS_CHECK(input_product_scale >= 0);
165     *multiplier = input_product_scale / outputShape.scale;
166     return true;
167 }
168 
GetQuantizedConvolutionMultiplier(const Shape & inputShape,const Shape & filterShape,const Shape & outputShape,double * multiplier)169 bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape,
170                                        const Shape& outputShape, double* multiplier) {
171     // Upcast input_product to double
172     const double input_product_scale = inputShape.scale * filterShape.scale;
173 
174     // The following conditions must be guaranteed by the training pipeline.
175     NN_OPS_CHECK(input_product_scale >= 0);
176     *multiplier = input_product_scale / outputShape.scale;
177     return true;
178 }
179 
CalculateActivationRangeUint8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)180 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
181                                    int32_t* act_max) {
182     const int32_t qmin = std::numeric_limits<uint8_t>::min();
183     const int32_t qmax = std::numeric_limits<uint8_t>::max();
184 
185     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
186 }
187 
CalculateActivationRangeInt8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)188 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
189                                   int32_t* act_max) {
190     const int32_t qmin = std::numeric_limits<int8_t>::min();
191     const int32_t qmax = std::numeric_limits<int8_t>::max();
192 
193     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
194 }
195 
CalculateActivationRangeFloat(int32_t activation,float * activation_min,float * activation_max)196 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
197                                    float* activation_max) {
198     if (activation == kActivationRelu) {
199         *activation_min = 0.f;
200         *activation_max = std::numeric_limits<float>::max();
201     } else if (activation == kActivationRelu6) {
202         *activation_min = 0.f;
203         *activation_max = 6.f;
204     } else if (activation == kActivationRelu1) {
205         *activation_min = -1.f;
206         *activation_max = 1.f;
207     } else if (activation == kActivationNone) {
208         *activation_min = std::numeric_limits<float>::lowest();
209         *activation_max = std::numeric_limits<float>::max();
210     } else {
211         LOG(ERROR) << "Unsupported fused activation function.";
212     }
213 }
214 
CalculateInputRadius(int input_integer_bits,int input_left_shift)215 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
216     const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
217                                       (1LL << (31 - input_integer_bits)) /
218                                       (1LL << input_left_shift);
219     // Tighten bound using floor.  Suppose that we could use the exact value.
220     // After scaling the difference, the result would be at the maximum.  Thus we
221     // must ensure that our value has lower magnitude.
222     return static_cast<int32_t>(std::floor(max_input_rescaled));
223 }
224 
calculateExplicitPaddingImpl(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,bool isTransposeConv,int32_t * padding_head,int32_t * padding_tail)225 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
226                                   int32_t filter_size, int32_t padding_implicit,
227                                   bool isTransposeConv, int32_t* padding_head,
228                                   int32_t* padding_tail) {
229     *padding_head = 0;
230     *padding_tail = 0;
231 
232     int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1;
233 
234     if (padding_implicit == kPaddingSame) {
235         int32_t out_size = (in_size + stride - 1) / stride;
236         int32_t tmp = (out_size - 1) * stride + effective_filter_size;
237         if (tmp > in_size) {
238             *padding_head = (tmp - in_size) / 2;
239             *padding_tail = (tmp - in_size) - *padding_head;
240         }
241         // For transpose conv, make padding tail fit tightly to the end of the last stride.
242         if (isTransposeConv) {
243             *padding_tail = (tmp - in_size) - *padding_head;
244         }
245     }
246 }
247 
calculateBroadcastedShape(const Shape & in1,const Shape & in2,Shape * out)248 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) {
249     NN_RET_CHECK(in1.type == in2.type);
250     uint32_t numberOfDims1 = getNumberOfDimensions(in1);
251     uint32_t numberOfDims2 = getNumberOfDimensions(in2);
252     uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
253     out->dimensions = std::vector<uint32_t>(maxDims);
254     for (uint32_t i = 1; i <= maxDims; i++) {
255         uint32_t dim1 = 1;
256         if (i <= numberOfDims1) {
257             dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
258         }
259         uint32_t dim2 = 1;
260         if (i <= numberOfDims2) {
261             dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
262         }
263         if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
264             LOG(ERROR) << "Dimensions mismatch for broadcast:\n"
265                        << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1
266                        << "\nSecond tensor: dimension " << numberOfDims2 - i << " of size " << dim2;
267             return false;
268         }
269         out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1;
270     }
271     return true;
272 }
273 
274 template <>
requantize(uint8_t value,const Shape & oldShape,const Shape & newShape)275 uint8_t requantize<uint8_t>(uint8_t value, const Shape& oldShape, const Shape& newShape) {
276     double doubleValue = (value - oldShape.offset) * oldShape.scale;
277     double doubleRet = doubleValue / newShape.scale + newShape.offset;
278     if (doubleRet < 0) return 0;
279     if (doubleRet > 255) return 255;
280     return static_cast<uint8_t>(std::round(doubleRet));
281 }
282 
283 template <>
requantize(int8_t value,const Shape & oldShape,const Shape & newShape)284 int8_t requantize<int8_t>(int8_t value, const Shape& oldShape, const Shape& newShape) {
285     double doubleValue = (value - oldShape.offset) * oldShape.scale;
286     double doubleRet = doubleValue / newShape.scale + newShape.offset;
287     if (doubleRet < -128) return -128;
288     if (doubleRet > 127) return 127;
289     return static_cast<int8_t>(std::round(doubleRet));
290 }
291 
reshapePrepare(const Shape & input,const int32_t * targetDims,const int32_t targetDimsSize,Shape * output)292 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
293                     Shape* output) {
294     // Reshape allows one of the targetDims components to have the
295     // special -1 value, meaning it will be calculated automatically based on the
296     // input. Here we calculate what that dimension should be so that the number
297     // of output elements in the same as the number of input elements.
298     int32_t numInputElements = (int32_t)getNumberOfElements(input);
299 
300     std::vector<uint32_t> outDims(targetDimsSize);
301     int32_t numOutputElements = 1;
302     int32_t strechDim = -1;
303     for (int32_t i = 0; i < targetDimsSize; ++i) {
304         int32_t value = targetDims[i];
305         if (value == -1) {
306             NN_OPS_CHECK(strechDim == -1);
307             strechDim = i;
308         } else {
309             numOutputElements *= value;
310             outDims[i] = (uint32_t)value;
311         }
312     }
313     if (strechDim != -1) {
314         int32_t strechValue = numInputElements / numOutputElements;
315         outDims[strechDim] = (uint32_t)strechValue;
316         numOutputElements *= strechValue;
317     }
318 
319     NN_OPS_CHECK(numInputElements == numOutputElements);
320 
321     output->type = input.type;
322     output->dimensions = outDims;
323     output->offset = input.offset;
324     output->scale = input.scale;
325 
326     return true;
327 }
328 
depthToSpacePrepare(const Shape & input,int32_t blockSize,Shape * output)329 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) {
330     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
331     NN_OPS_CHECK(blockSize > 0);
332 
333     uint32_t batches = getSizeOfDimension(input, 0);
334     uint32_t height = getSizeOfDimension(input, 1);
335     uint32_t width = getSizeOfDimension(input, 2);
336     uint32_t channels = getSizeOfDimension(input, 3);
337 
338     NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
339     output->type = input.type;
340     output->dimensions = {batches, height * blockSize, width * blockSize,
341                           channels / (blockSize * blockSize)};
342     output->offset = input.offset;
343     output->scale = input.scale;
344 
345     return true;
346 }
347 
spaceToDepthPrepare(const Shape & input,int32_t blockSize,Shape * output)348 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) {
349     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
350     NN_OPS_CHECK(blockSize > 0);
351 
352     uint32_t batches = getSizeOfDimension(input, 0);
353     uint32_t height = getSizeOfDimension(input, 1);
354     uint32_t width = getSizeOfDimension(input, 2);
355     uint32_t channels = getSizeOfDimension(input, 3);
356 
357     NN_OPS_CHECK(height % blockSize == 0);
358     NN_OPS_CHECK(width % blockSize == 0);
359 
360     output->type = input.type;
361     output->dimensions = {batches, height / blockSize, width / blockSize,
362                           channels * (blockSize * blockSize)};
363     output->offset = input.offset;
364     output->scale = input.scale;
365 
366     return true;
367 }
368 
embeddingLookupPrepare(const Shape & valueShape,const Shape & lookupShape,Shape * outputShape)369 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) {
370     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
371     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
372 
373     const uint32_t columns = getSizeOfDimension(valueShape, 1);
374     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
375 
376     outputShape->type = valueShape.type;
377     outputShape->dimensions = {lookups, columns};
378     for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
379         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
380     }
381     outputShape->offset = valueShape.offset;
382     outputShape->scale = valueShape.scale;
383 
384     return true;
385 }
386 
hashtableLookupPrepare(const Shape & lookupShape,const Shape & keyShape,const Shape & valueShape,Shape * outputShape,Shape * hitShape)387 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
388                             const Shape& valueShape, Shape* outputShape, Shape* hitShape) {
389     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
390     NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
391     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
392 
393     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
394     outputShape->type = valueShape.type;
395     outputShape->dimensions = {lookups};
396     for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
397         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
398     }
399     outputShape->offset = valueShape.offset;
400     outputShape->scale = valueShape.scale;
401 
402     hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
403     hitShape->dimensions = {lookups};
404     hitShape->offset = 0;
405     hitShape->scale = 1.f;
406 
407     return true;
408 }
409 
padPrepare(const Shape & input,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)410 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
411                 Shape* output) {
412     uint32_t numInputDims = getNumberOfDimensions(input);
413 
414     // paddings need to be provided as a 2-D int32 tensor.
415     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
416     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
417     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
418     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
419 
420     std::vector<uint32_t> outDims(numInputDims);
421     for (uint32_t i = 0; i < numInputDims; ++i) {
422         int32_t beforePadding = *paddingsData++;
423         int32_t afterPadding = *paddingsData++;
424         // Pad value has to be greater than equal to 0.
425         NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
426         outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
427     }
428     output->type = input.type;
429     output->dimensions = outDims;
430     output->offset = input.offset;
431     output->scale = input.scale;
432 
433     return true;
434 }
435 
batchToSpacePrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,Shape * output)436 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
437                          const Shape& blockSizeShape, Shape* output) {
438     // Only 4D NHWC tensors are supported.
439     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
440 
441     // blockSize need to be provided as a 1-D int32 tensor.
442     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
443     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
444     // Only applies to spatial dimensions.
445     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
446 
447     uint32_t batches = getSizeOfDimension(input, 0);
448     uint32_t height = getSizeOfDimension(input, 1);
449     uint32_t width = getSizeOfDimension(input, 2);
450     uint32_t channels = getSizeOfDimension(input, 3);
451 
452     NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
453     output->type = input.type;
454     output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
455                           height * blockSizeData[0], width * blockSizeData[1], channels};
456     output->offset = input.offset;
457     output->scale = input.scale;
458 
459     return true;
460 }
461 
spaceToBatchPrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)462 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
463                          const Shape& blockSizeShape, const int32_t* paddingsData,
464                          const Shape& paddingsShape, Shape* output) {
465     // Only 4D NHWC tensors are supported.
466     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
467 
468     // blockSize need to be provided as a 1-D int32 tensor.
469     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
470     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
471     // Only applies to spatial dimensions.
472     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
473 
474     // paddings need to be provided as a 2-D int32 tensor.
475     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
476     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
477     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
478     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
479 
480     uint32_t batches = getSizeOfDimension(input, 0);
481     uint32_t height = getSizeOfDimension(input, 1);
482     uint32_t width = getSizeOfDimension(input, 2);
483     uint32_t channels = getSizeOfDimension(input, 3);
484 
485     uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
486     uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
487 
488     NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
489     NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
490 
491     output->type = input.type;
492     output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
493                           paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1],
494                           channels};
495     output->offset = input.offset;
496     output->scale = input.scale;
497 
498     return true;
499 }
500 
meanPrepare(const Shape & input,const int32_t * axisData,const Shape & axisShape,bool keepDims,Shape * output)501 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
502                  Shape* output) {
503     // perm need to be provided as a 1-D int32 tensor.
504     NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
505     NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
506 
507     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
508     int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
509 
510     // Determines size of output tensor.
511     if (keepDims) {
512         std::vector<uint32_t> outDims(numInputDims);
513         for (int32_t idx = 0; idx < numInputDims; ++idx) {
514             bool isAxis = false;
515             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
516                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
517                     isAxis = true;
518                     break;
519                 }
520             }
521             if (isAxis) {
522                 outDims[idx] = 1;
523             } else {
524                 outDims[idx] = getSizeOfDimension(input, idx);
525             }
526         }
527         output->dimensions = outDims;
528     } else {
529         // Calculates size of reducing axis.
530         int32_t numReduceAxis = axisSize;
531         for (int32_t i = 0; i < axisSize; ++i) {
532             int32_t current = axisData[i];
533             if (current < 0) {
534                 current += numInputDims;
535             }
536             NN_OPS_CHECK(current >= 0 && current < numInputDims);
537             for (int32_t j = 0; j < i; ++j) {
538                 int32_t previous = axisData[j];
539                 if (previous < 0) {
540                     previous += numInputDims;
541                 }
542                 if (current == previous) {
543                     --numReduceAxis;
544                     break;
545                 }
546             }
547         }
548         // Determines output dimensions.
549         std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
550         int32_t numSkipAxis = 0;
551         for (int32_t idx = 0; idx < numInputDims; ++idx) {
552             bool isAxis = false;
553             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
554                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
555                     ++numSkipAxis;
556                     isAxis = true;
557                     break;
558                 }
559             }
560             if (!isAxis) {
561                 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
562             }
563         }
564         // Handle the case when all dimensions are removed
565         if (outDims.empty()) {
566             outDims.push_back(1);
567         }
568         output->dimensions = outDims;
569     }
570 
571     output->type = input.type;
572     output->offset = input.offset;
573     output->scale = input.scale;
574 
575     return true;
576 }
577 
argMinMaxPrepare(const Shape & input,int32_t axis,Shape * output)578 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) {
579     NN_CHECK(handleNegativeAxis(input, &axis));
580 
581     output->type = OperandType::TENSOR_INT32;
582 
583     // Copy the input dimensions, omitting the axis dimension.
584     output->dimensions.clear();
585     if (getNumberOfDimensions(input) > 1) {
586         output->dimensions.reserve(getNumberOfDimensions(input) - 1);
587         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(),
588                                   input.dimensions.begin() + axis);
589         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1,
590                                   input.dimensions.end());
591     } else {
592         output->dimensions.push_back(1);
593     }
594 
595     return true;
596 }
597 
splitPrepare(const Shape & input,int32_t axis,int32_t numOutputs,std::vector<Shape> * output)598 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs,
599                   std::vector<Shape>* output) {
600     NN_CHECK(handleNegativeAxis(input, &axis));
601 
602     const int32_t sizeOfAxisToSplit = input.dimensions[axis];
603     NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0);
604     const int32_t sliceSize = sizeOfAxisToSplit / numOutputs;
605 
606     for (int i = 0; i < numOutputs; ++i) {
607         output->at(i).type = input.type;
608         output->at(i).dimensions = input.dimensions;
609         output->at(i).dimensions[axis] = sliceSize;
610         output->at(i).offset = input.offset;
611         output->at(i).scale = input.scale;
612     }
613     return true;
614 }
615 
groupedConvPrepare(const Shape & input,const Shape & filter,const Shape & bias,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,Shape * output)616 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
617                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
618                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
619                         int32_t numGroups, Shape* output) {
620     if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
621         NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM ||
622                      input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED);
623     } else {
624         NN_OPS_CHECK(input.type == filter.type);
625     }
626     if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
627         input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
628         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
629     } else {
630         NN_OPS_CHECK(input.type == bias.type);
631     }
632     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
633     NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
634     NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
635 
636     NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
637 
638     NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3));
639     NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0);
640 
641     uint32_t channels_out = getSizeOfDimension(filter, 0);
642     uint32_t width = getSizeOfDimension(input, 2);
643     uint32_t height = getSizeOfDimension(input, 1);
644     uint32_t filterWidth = getSizeOfDimension(filter, 2);
645     uint32_t filterHeight = getSizeOfDimension(filter, 1);
646     uint32_t batches = getSizeOfDimension(input, 0);
647 
648     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_left);
649     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_right);
650     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_top);
651     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_bottom);
652 
653     uint32_t outWidth =
654             computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
655     uint32_t outHeight =
656             computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
657 
658     output->type = input.type;
659     output->dimensions = {batches, outHeight, outWidth, channels_out};
660     return true;
661 }
662 
663 }  // namespace nn
664 }  // namespace android
665