1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "OperationsUtils"
18
19 #include "OperationsExecutionUtils.h"
20
21 #include <android-base/logging.h>
22
23 #include <algorithm>
24 #include <cmath>
25 #include <limits>
26 #include <sstream>
27 #include <vector>
28
29 #include "ActivationFunctor.h"
30 #include "nnapi/Validation.h"
31
32 namespace android {
33 namespace nn {
34
35 namespace {
36
CalculateActivationRangeImpl(int32_t activation,const Shape & outputShape,int32_t qmin,int32_t qmax,int32_t * act_min,int32_t * act_max)37 void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin,
38 int32_t qmax, int32_t* act_min, int32_t* act_max) {
39 const auto scale = outputShape.scale;
40 const auto zero_point = outputShape.offset;
41
42 auto quantize = [scale, zero_point](float f) {
43 return zero_point + static_cast<int32_t>(std::round(f / scale));
44 };
45
46 if (activation == kActivationRelu) {
47 *act_min = std::max(qmin, quantize(0.0));
48 *act_max = qmax;
49 } else if (activation == kActivationRelu6) {
50 *act_min = std::max(qmin, quantize(0.0));
51 *act_max = std::min(qmax, quantize(6.0));
52 } else if (activation == kActivationRelu1) {
53 *act_min = std::max(qmin, quantize(-1.0));
54 *act_max = std::min(qmax, quantize(1.0));
55 } else if (activation == kActivationNone) {
56 *act_min = qmin;
57 *act_max = qmax;
58 } else {
59 LOG(ERROR) << "Unsupported fused activation function.";
60 }
61 }
62
63 } // namespace
64
handleNegativeAxis(int32_t numberOfDimensions,int32_t * axis)65 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) {
66 NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions);
67 if (*axis < 0) {
68 *axis += numberOfDimensions;
69 }
70 return true;
71 }
72
QuantizeMultiplier(double double_multiplier,int32_t * quantized_multiplier,int32_t * shift)73 bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) {
74 if (double_multiplier == 0.) {
75 *quantized_multiplier = 0;
76 *shift = 0;
77 return true;
78 }
79 const double q = std::frexp(double_multiplier, shift);
80 auto q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
81 NN_RET_CHECK(q_fixed <= (1LL << 31));
82 if (q_fixed == (1LL << 31)) {
83 q_fixed /= 2;
84 ++*shift;
85 }
86 NN_RET_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
87 // A shift amount smaller than -31 would cause all bits to be shifted out
88 // and thus all results would be zero. We implement that instead with
89 // q_fixed==0, so as to avoid hitting issues with right-shift
90 // operations with shift amounts greater than 31. Note that this happens
91 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
92 // that we're effectively flushing tiny double_multiplier's to zero.
93 // We could conceivably handle values in the range (roughly) [32, 63]
94 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
95 // the present handling is just doing 'flush denormals to zero'. We could
96 // reconsider and actually generate nonzero denormals if a need arises.
97 if (*shift < -31) {
98 *shift = 0;
99 q_fixed = 0;
100 }
101 *quantized_multiplier = static_cast<int32_t>(q_fixed);
102 return true;
103 }
104
QuantizeMultiplierSmallerThanOneExp(double double_multiplier,int32_t * quantized_multiplier,int32_t * left_shift)105 bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier,
106 int32_t* left_shift) {
107 NN_RET_CHECK(double_multiplier > 0.);
108 NN_RET_CHECK(double_multiplier < 1.);
109 NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift));
110 NN_RET_CHECK(*left_shift <= 0);
111 return true;
112 }
113
QuantizeMultiplierSmallerThanOne(double double_multiplier,int32_t * quantized_multiplier,int32_t * right_shift)114 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
115 int32_t* right_shift) {
116 NN_OPS_CHECK(double_multiplier >= 0.);
117 NN_OPS_CHECK(double_multiplier < 1.);
118 if (double_multiplier == 0.) {
119 *quantized_multiplier = 0;
120 *right_shift = 0;
121 return true;
122 }
123 NN_OPS_CHECK(double_multiplier > 0.);
124 const double q = std::frexp(double_multiplier, right_shift);
125 *right_shift *= -1;
126 int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
127 NN_OPS_CHECK(q_fixed <= (1LL << 31));
128 if (q_fixed == (1LL << 31)) {
129 q_fixed /= 2;
130 --*right_shift;
131 }
132 NN_OPS_CHECK(*right_shift >= 0);
133 NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
134 *quantized_multiplier = static_cast<int32_t>(q_fixed);
135 return true;
136 }
137
QuantizeMultiplierGreaterThanOne(double double_multiplier,int32_t * quantized_multiplier,int * left_shift)138 bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
139 int* left_shift) {
140 NN_OPS_CHECK(double_multiplier > 1.);
141 const double q = std::frexp(double_multiplier, left_shift);
142 int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
143 NN_OPS_CHECK(q_fixed <= (1LL << 31));
144 if (q_fixed == (1LL << 31)) {
145 q_fixed /= 2;
146 ++*left_shift;
147 }
148 NN_OPS_CHECK(*left_shift >= 0);
149 NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
150 *quantized_multiplier = static_cast<int32_t>(q_fixed);
151 return true;
152 }
153
GetQuantizedConvolutionMultiplier(const Shape & inputShape,const Shape & filterShape,const Shape & biasShape,const Shape & outputShape,double * multiplier)154 bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape,
155 const Shape& biasShape, const Shape& outputShape,
156 double* multiplier) {
157 // Upcast bias and input_product to double
158 const double input_product_scale = inputShape.scale * filterShape.scale;
159 const double bias_scale = biasShape.scale;
160
161 // The following conditions must be guaranteed by the training pipeline.
162 NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
163 1e-6 * std::min(input_product_scale, bias_scale));
164 NN_OPS_CHECK(input_product_scale >= 0);
165 *multiplier = input_product_scale / outputShape.scale;
166 return true;
167 }
168
GetQuantizedConvolutionMultiplier(const Shape & inputShape,const Shape & filterShape,const Shape & outputShape,double * multiplier)169 bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape,
170 const Shape& outputShape, double* multiplier) {
171 // Upcast input_product to double
172 const double input_product_scale = inputShape.scale * filterShape.scale;
173
174 // The following conditions must be guaranteed by the training pipeline.
175 NN_OPS_CHECK(input_product_scale >= 0);
176 *multiplier = input_product_scale / outputShape.scale;
177 return true;
178 }
179
CalculateActivationRangeUint8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)180 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
181 int32_t* act_max) {
182 const int32_t qmin = std::numeric_limits<uint8_t>::min();
183 const int32_t qmax = std::numeric_limits<uint8_t>::max();
184
185 CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
186 }
187
CalculateActivationRangeInt8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)188 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
189 int32_t* act_max) {
190 const int32_t qmin = std::numeric_limits<int8_t>::min();
191 const int32_t qmax = std::numeric_limits<int8_t>::max();
192
193 CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
194 }
195
CalculateActivationRangeFloat(int32_t activation,float * activation_min,float * activation_max)196 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
197 float* activation_max) {
198 if (activation == kActivationRelu) {
199 *activation_min = 0.f;
200 *activation_max = std::numeric_limits<float>::max();
201 } else if (activation == kActivationRelu6) {
202 *activation_min = 0.f;
203 *activation_max = 6.f;
204 } else if (activation == kActivationRelu1) {
205 *activation_min = -1.f;
206 *activation_max = 1.f;
207 } else if (activation == kActivationNone) {
208 *activation_min = std::numeric_limits<float>::lowest();
209 *activation_max = std::numeric_limits<float>::max();
210 } else {
211 LOG(ERROR) << "Unsupported fused activation function.";
212 }
213 }
214
CalculateInputRadius(int input_integer_bits,int input_left_shift)215 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
216 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
217 (1LL << (31 - input_integer_bits)) /
218 (1LL << input_left_shift);
219 // Tighten bound using floor. Suppose that we could use the exact value.
220 // After scaling the difference, the result would be at the maximum. Thus we
221 // must ensure that our value has lower magnitude.
222 return static_cast<int32_t>(std::floor(max_input_rescaled));
223 }
224
calculateExplicitPaddingImpl(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,bool isTransposeConv,int32_t * padding_head,int32_t * padding_tail)225 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
226 int32_t filter_size, int32_t padding_implicit,
227 bool isTransposeConv, int32_t* padding_head,
228 int32_t* padding_tail) {
229 *padding_head = 0;
230 *padding_tail = 0;
231
232 int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1;
233
234 if (padding_implicit == kPaddingSame) {
235 int32_t out_size = (in_size + stride - 1) / stride;
236 int32_t tmp = (out_size - 1) * stride + effective_filter_size;
237 if (tmp > in_size) {
238 *padding_head = (tmp - in_size) / 2;
239 *padding_tail = (tmp - in_size) - *padding_head;
240 }
241 // For transpose conv, make padding tail fit tightly to the end of the last stride.
242 if (isTransposeConv) {
243 *padding_tail = (tmp - in_size) - *padding_head;
244 }
245 }
246 }
247
calculateBroadcastedShape(const Shape & in1,const Shape & in2,Shape * out)248 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) {
249 NN_RET_CHECK(in1.type == in2.type);
250 uint32_t numberOfDims1 = getNumberOfDimensions(in1);
251 uint32_t numberOfDims2 = getNumberOfDimensions(in2);
252 uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
253 out->dimensions = std::vector<uint32_t>(maxDims);
254 for (uint32_t i = 1; i <= maxDims; i++) {
255 uint32_t dim1 = 1;
256 if (i <= numberOfDims1) {
257 dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
258 }
259 uint32_t dim2 = 1;
260 if (i <= numberOfDims2) {
261 dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
262 }
263 if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
264 LOG(ERROR) << "Dimensions mismatch for broadcast:\n"
265 << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1
266 << "\nSecond tensor: dimension " << numberOfDims2 - i << " of size " << dim2;
267 return false;
268 }
269 out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1;
270 }
271 return true;
272 }
273
274 template <>
requantize(uint8_t value,const Shape & oldShape,const Shape & newShape)275 uint8_t requantize<uint8_t>(uint8_t value, const Shape& oldShape, const Shape& newShape) {
276 double doubleValue = (value - oldShape.offset) * oldShape.scale;
277 double doubleRet = doubleValue / newShape.scale + newShape.offset;
278 if (doubleRet < 0) return 0;
279 if (doubleRet > 255) return 255;
280 return static_cast<uint8_t>(std::round(doubleRet));
281 }
282
283 template <>
requantize(int8_t value,const Shape & oldShape,const Shape & newShape)284 int8_t requantize<int8_t>(int8_t value, const Shape& oldShape, const Shape& newShape) {
285 double doubleValue = (value - oldShape.offset) * oldShape.scale;
286 double doubleRet = doubleValue / newShape.scale + newShape.offset;
287 if (doubleRet < -128) return -128;
288 if (doubleRet > 127) return 127;
289 return static_cast<int8_t>(std::round(doubleRet));
290 }
291
reshapePrepare(const Shape & input,const int32_t * targetDims,const int32_t targetDimsSize,Shape * output)292 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
293 Shape* output) {
294 // Reshape allows one of the targetDims components to have the
295 // special -1 value, meaning it will be calculated automatically based on the
296 // input. Here we calculate what that dimension should be so that the number
297 // of output elements in the same as the number of input elements.
298 int32_t numInputElements = (int32_t)getNumberOfElements(input);
299
300 std::vector<uint32_t> outDims(targetDimsSize);
301 int32_t numOutputElements = 1;
302 int32_t strechDim = -1;
303 for (int32_t i = 0; i < targetDimsSize; ++i) {
304 int32_t value = targetDims[i];
305 if (value == -1) {
306 NN_OPS_CHECK(strechDim == -1);
307 strechDim = i;
308 } else {
309 numOutputElements *= value;
310 outDims[i] = (uint32_t)value;
311 }
312 }
313 if (strechDim != -1) {
314 int32_t strechValue = numInputElements / numOutputElements;
315 outDims[strechDim] = (uint32_t)strechValue;
316 numOutputElements *= strechValue;
317 }
318
319 NN_OPS_CHECK(numInputElements == numOutputElements);
320
321 output->type = input.type;
322 output->dimensions = outDims;
323 output->offset = input.offset;
324 output->scale = input.scale;
325
326 return true;
327 }
328
depthToSpacePrepare(const Shape & input,int32_t blockSize,Shape * output)329 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) {
330 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
331 NN_OPS_CHECK(blockSize > 0);
332
333 uint32_t batches = getSizeOfDimension(input, 0);
334 uint32_t height = getSizeOfDimension(input, 1);
335 uint32_t width = getSizeOfDimension(input, 2);
336 uint32_t channels = getSizeOfDimension(input, 3);
337
338 NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
339 output->type = input.type;
340 output->dimensions = {batches, height * blockSize, width * blockSize,
341 channels / (blockSize * blockSize)};
342 output->offset = input.offset;
343 output->scale = input.scale;
344
345 return true;
346 }
347
spaceToDepthPrepare(const Shape & input,int32_t blockSize,Shape * output)348 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) {
349 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
350 NN_OPS_CHECK(blockSize > 0);
351
352 uint32_t batches = getSizeOfDimension(input, 0);
353 uint32_t height = getSizeOfDimension(input, 1);
354 uint32_t width = getSizeOfDimension(input, 2);
355 uint32_t channels = getSizeOfDimension(input, 3);
356
357 NN_OPS_CHECK(height % blockSize == 0);
358 NN_OPS_CHECK(width % blockSize == 0);
359
360 output->type = input.type;
361 output->dimensions = {batches, height / blockSize, width / blockSize,
362 channels * (blockSize * blockSize)};
363 output->offset = input.offset;
364 output->scale = input.scale;
365
366 return true;
367 }
368
embeddingLookupPrepare(const Shape & valueShape,const Shape & lookupShape,Shape * outputShape)369 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) {
370 NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
371 NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
372
373 const uint32_t columns = getSizeOfDimension(valueShape, 1);
374 const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
375
376 outputShape->type = valueShape.type;
377 outputShape->dimensions = {lookups, columns};
378 for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
379 outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
380 }
381 outputShape->offset = valueShape.offset;
382 outputShape->scale = valueShape.scale;
383
384 return true;
385 }
386
hashtableLookupPrepare(const Shape & lookupShape,const Shape & keyShape,const Shape & valueShape,Shape * outputShape,Shape * hitShape)387 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
388 const Shape& valueShape, Shape* outputShape, Shape* hitShape) {
389 NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
390 NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
391 NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
392
393 const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
394 outputShape->type = valueShape.type;
395 outputShape->dimensions = {lookups};
396 for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
397 outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
398 }
399 outputShape->offset = valueShape.offset;
400 outputShape->scale = valueShape.scale;
401
402 hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
403 hitShape->dimensions = {lookups};
404 hitShape->offset = 0;
405 hitShape->scale = 1.f;
406
407 return true;
408 }
409
padPrepare(const Shape & input,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)410 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
411 Shape* output) {
412 uint32_t numInputDims = getNumberOfDimensions(input);
413
414 // paddings need to be provided as a 2-D int32 tensor.
415 NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
416 NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
417 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
418 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
419
420 std::vector<uint32_t> outDims(numInputDims);
421 for (uint32_t i = 0; i < numInputDims; ++i) {
422 int32_t beforePadding = *paddingsData++;
423 int32_t afterPadding = *paddingsData++;
424 // Pad value has to be greater than equal to 0.
425 NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
426 outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
427 }
428 output->type = input.type;
429 output->dimensions = outDims;
430 output->offset = input.offset;
431 output->scale = input.scale;
432
433 return true;
434 }
435
batchToSpacePrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,Shape * output)436 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
437 const Shape& blockSizeShape, Shape* output) {
438 // Only 4D NHWC tensors are supported.
439 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
440
441 // blockSize need to be provided as a 1-D int32 tensor.
442 NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
443 NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
444 // Only applies to spatial dimensions.
445 NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
446
447 uint32_t batches = getSizeOfDimension(input, 0);
448 uint32_t height = getSizeOfDimension(input, 1);
449 uint32_t width = getSizeOfDimension(input, 2);
450 uint32_t channels = getSizeOfDimension(input, 3);
451
452 NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
453 output->type = input.type;
454 output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
455 height * blockSizeData[0], width * blockSizeData[1], channels};
456 output->offset = input.offset;
457 output->scale = input.scale;
458
459 return true;
460 }
461
spaceToBatchPrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)462 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
463 const Shape& blockSizeShape, const int32_t* paddingsData,
464 const Shape& paddingsShape, Shape* output) {
465 // Only 4D NHWC tensors are supported.
466 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
467
468 // blockSize need to be provided as a 1-D int32 tensor.
469 NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
470 NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
471 // Only applies to spatial dimensions.
472 NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
473
474 // paddings need to be provided as a 2-D int32 tensor.
475 NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
476 NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
477 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
478 NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
479
480 uint32_t batches = getSizeOfDimension(input, 0);
481 uint32_t height = getSizeOfDimension(input, 1);
482 uint32_t width = getSizeOfDimension(input, 2);
483 uint32_t channels = getSizeOfDimension(input, 3);
484
485 uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
486 uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
487
488 NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
489 NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
490
491 output->type = input.type;
492 output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
493 paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1],
494 channels};
495 output->offset = input.offset;
496 output->scale = input.scale;
497
498 return true;
499 }
500
meanPrepare(const Shape & input,const int32_t * axisData,const Shape & axisShape,bool keepDims,Shape * output)501 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
502 Shape* output) {
503 // perm need to be provided as a 1-D int32 tensor.
504 NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
505 NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
506
507 int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
508 int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
509
510 // Determines size of output tensor.
511 if (keepDims) {
512 std::vector<uint32_t> outDims(numInputDims);
513 for (int32_t idx = 0; idx < numInputDims; ++idx) {
514 bool isAxis = false;
515 for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
516 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
517 isAxis = true;
518 break;
519 }
520 }
521 if (isAxis) {
522 outDims[idx] = 1;
523 } else {
524 outDims[idx] = getSizeOfDimension(input, idx);
525 }
526 }
527 output->dimensions = outDims;
528 } else {
529 // Calculates size of reducing axis.
530 int32_t numReduceAxis = axisSize;
531 for (int32_t i = 0; i < axisSize; ++i) {
532 int32_t current = axisData[i];
533 if (current < 0) {
534 current += numInputDims;
535 }
536 NN_OPS_CHECK(current >= 0 && current < numInputDims);
537 for (int32_t j = 0; j < i; ++j) {
538 int32_t previous = axisData[j];
539 if (previous < 0) {
540 previous += numInputDims;
541 }
542 if (current == previous) {
543 --numReduceAxis;
544 break;
545 }
546 }
547 }
548 // Determines output dimensions.
549 std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
550 int32_t numSkipAxis = 0;
551 for (int32_t idx = 0; idx < numInputDims; ++idx) {
552 bool isAxis = false;
553 for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
554 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
555 ++numSkipAxis;
556 isAxis = true;
557 break;
558 }
559 }
560 if (!isAxis) {
561 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
562 }
563 }
564 // Handle the case when all dimensions are removed
565 if (outDims.empty()) {
566 outDims.push_back(1);
567 }
568 output->dimensions = outDims;
569 }
570
571 output->type = input.type;
572 output->offset = input.offset;
573 output->scale = input.scale;
574
575 return true;
576 }
577
argMinMaxPrepare(const Shape & input,int32_t axis,Shape * output)578 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) {
579 NN_CHECK(handleNegativeAxis(input, &axis));
580
581 output->type = OperandType::TENSOR_INT32;
582
583 // Copy the input dimensions, omitting the axis dimension.
584 output->dimensions.clear();
585 if (getNumberOfDimensions(input) > 1) {
586 output->dimensions.reserve(getNumberOfDimensions(input) - 1);
587 output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(),
588 input.dimensions.begin() + axis);
589 output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1,
590 input.dimensions.end());
591 } else {
592 output->dimensions.push_back(1);
593 }
594
595 return true;
596 }
597
splitPrepare(const Shape & input,int32_t axis,int32_t numOutputs,std::vector<Shape> * output)598 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs,
599 std::vector<Shape>* output) {
600 NN_CHECK(handleNegativeAxis(input, &axis));
601
602 const int32_t sizeOfAxisToSplit = input.dimensions[axis];
603 NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0);
604 const int32_t sliceSize = sizeOfAxisToSplit / numOutputs;
605
606 for (int i = 0; i < numOutputs; ++i) {
607 output->at(i).type = input.type;
608 output->at(i).dimensions = input.dimensions;
609 output->at(i).dimensions[axis] = sliceSize;
610 output->at(i).offset = input.offset;
611 output->at(i).scale = input.scale;
612 }
613 return true;
614 }
615
groupedConvPrepare(const Shape & input,const Shape & filter,const Shape & bias,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,Shape * output)616 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
617 int32_t padding_left, int32_t padding_right, int32_t padding_top,
618 int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
619 int32_t numGroups, Shape* output) {
620 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
621 NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM ||
622 input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED);
623 } else {
624 NN_OPS_CHECK(input.type == filter.type);
625 }
626 if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
627 input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
628 NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
629 } else {
630 NN_OPS_CHECK(input.type == bias.type);
631 }
632 NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
633 NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
634 NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
635
636 NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
637
638 NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3));
639 NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0);
640
641 uint32_t channels_out = getSizeOfDimension(filter, 0);
642 uint32_t width = getSizeOfDimension(input, 2);
643 uint32_t height = getSizeOfDimension(input, 1);
644 uint32_t filterWidth = getSizeOfDimension(filter, 2);
645 uint32_t filterHeight = getSizeOfDimension(filter, 1);
646 uint32_t batches = getSizeOfDimension(input, 0);
647
648 NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_left);
649 NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_right);
650 NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_top);
651 NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_bottom);
652
653 uint32_t outWidth =
654 computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
655 uint32_t outHeight =
656 computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
657
658 output->type = input.type;
659 output->dimensions = {batches, outHeight, outWidth, channels_out};
660 return true;
661 }
662
663 } // namespace nn
664 } // namespace android
665