1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Contains the implementation of the operations.
18 
19 #define LOG_TAG "Operations"
20 
21 #pragma clang diagnostic push
22 #pragma clang diagnostic ignored "-Wunused-parameter"
23 #pragma clang diagnostic ignored "-Wsign-compare"
24 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
25 #include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
26 #include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
27 #pragma clang diagnostic pop
28 
29 #include <vector>
30 
31 #include "CpuOperationUtils.h"
32 #include "LegacyUtils.h"
33 #include "Operations.h"
34 #include "Reshape.h"
35 #include "Tracing.h"
36 
37 namespace android {
38 namespace nn {
39 
copyData(const void * inputData,const Shape & inputShape,void * outputData,const Shape &)40 bool copyData(const void* inputData, const Shape& inputShape, void* outputData,
41               const Shape& /*outputShape*/) {
42     NNTRACE_COMP("copyData");
43     size_t count = nonExtensionOperandSizeOfData(inputShape.type, inputShape.dimensions);
44     memcpy(outputData, inputData, count);
45     return true;
46 }
47 
48 template <typename T>
depthToSpaceGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)49 bool depthToSpaceGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
50                          T* outputData, const Shape& outputShape) {
51     NNTRACE_COMP("optimized_ops::DepthToSpace");
52     tflite::optimized_ops::DepthToSpace(inputData, convertShapeToDims(inputShape), blockSize,
53                                         outputData, convertShapeToDims(outputShape));
54     return true;
55 }
56 template bool depthToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
57                                          int32_t blockSize, float* outputData,
58                                          const Shape& outputShape);
59 template bool depthToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
60                                             int32_t blockSize, _Float16* outputData,
61                                             const Shape& outputShape);
62 template bool depthToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
63                                            int32_t blockSize, uint8_t* outputData,
64                                            const Shape& outputShape);
65 template bool depthToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
66                                           int32_t blockSize, int8_t* outputData,
67                                           const Shape& outputShape);
68 
69 template <typename T>
spaceToDepthGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)70 bool spaceToDepthGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
71                          T* outputData, const Shape& outputShape) {
72     NNTRACE_COMP("optimized_ops::SpaceToDepth");
73     tflite::optimized_ops::SpaceToDepth(inputData, convertShapeToDims(inputShape), blockSize,
74                                         outputData, convertShapeToDims(outputShape));
75     return true;
76 }
77 template bool spaceToDepthGeneric<float>(const float* inputData, const Shape& inputShape,
78                                          int32_t blockSize, float* outputData,
79                                          const Shape& outputShape);
80 template bool spaceToDepthGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
81                                             int32_t blockSize, _Float16* outputData,
82                                             const Shape& outputShape);
83 template bool spaceToDepthGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
84                                            int32_t blockSize, uint8_t* outputData,
85                                            const Shape& outputShape);
86 template bool spaceToDepthGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
87                                           int32_t blockSize, int8_t* outputData,
88                                           const Shape& outputShape);
89 
90 template <typename T>
padGeneric(const T * inputData,const Shape & inputShape,const int32_t * paddings,T padValue,T * outputData,const Shape & outputShape)91 bool padGeneric(const T* inputData, const Shape& inputShape, const int32_t* paddings, T padValue,
92                 T* outputData, const Shape& outputShape) {
93     NNTRACE_TRANS("padGeneric");
94 
95     // Based on
96     // http://google3/third_party/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h?l=6194&rcl=213557260
97 
98     // TFLite runtime calls are currently fixed at 4 dimensions. Copy inputs so
99     // we can pad them to 4 dims (yes, we are "padding the padding").
100     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(inputShape));
101     NN_OPS_CHECK(numInputDims <= 4);
102     std::vector<int> leftPaddings(4 - numInputDims, 0);
103     std::vector<int> rightPaddings(4 - numInputDims, 0);
104     for (int32_t i = 0; i < numInputDims; ++i) {
105         leftPaddings.push_back(paddings[i * 2]);
106         rightPaddings.push_back(paddings[i * 2 + 1]);
107     }
108     const int leftBPadding = leftPaddings[0];
109     const int leftHPadding = leftPaddings[1];
110     const int leftWPadding = leftPaddings[2];
111     const int leftDPadding = leftPaddings[3];
112     const int rightBPadding = rightPaddings[0];
113     const int rightHPadding = rightPaddings[1];
114     const int rightWPadding = rightPaddings[2];
115     const int rightDPadding = rightPaddings[3];
116 
117     const auto extInputShape =
118             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(inputShape));
119     const auto extOutputShape =
120             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(outputShape));
121 
122     const int outputBatch = extOutputShape.Dims(0);
123     const int outputHeight = extOutputShape.Dims(1);
124     const int outputWidth = extOutputShape.Dims(2);
125     const int outputDepth = extOutputShape.Dims(3);
126 
127     const int inputDepth = extInputShape.Dims(3);
128 
129     NNTRACE_COMP_SWITCH("padGeneric");
130 
131     if (leftBPadding != 0) {
132         tflite::optimized_ops::TypedMemset<T>(
133                 outputData, padValue, leftBPadding * outputHeight * outputWidth * outputDepth);
134     }
135     for (int outB = leftBPadding; outB < outputBatch - rightBPadding; ++outB) {
136         if (leftHPadding != 0) {
137             tflite::optimized_ops::TypedMemset<T>(
138                     outputData + tflite::Offset(extOutputShape, outB, 0, 0, 0), padValue,
139                     leftHPadding * outputWidth * outputDepth);
140         }
141         for (int outH = leftHPadding; outH < outputHeight - rightHPadding; ++outH) {
142             if (leftWPadding != 0) {
143                 tflite::optimized_ops::TypedMemset<T>(
144                         outputData + tflite::Offset(extOutputShape, outB, outH, 0, 0), padValue,
145                         leftWPadding * outputDepth);
146             }
147             for (int outW = leftWPadding; outW < outputWidth - rightWPadding; ++outW) {
148                 if (leftDPadding != 0) {
149                     tflite::optimized_ops::TypedMemset<T>(
150                             outputData + tflite::Offset(extOutputShape, outB, outH, outW, 0),
151                             padValue, leftDPadding);
152                 }
153 
154                 T* out =
155                         outputData + tflite::Offset(extOutputShape, outB, outH, outW, leftDPadding);
156                 const T* in =
157                         inputData + tflite::Offset(extInputShape, outB - leftBPadding,
158                                                    outH - leftHPadding, outW - leftWPadding, 0);
159                 memcpy(out, in, inputDepth * sizeof(T));
160 
161                 if (rightDPadding != 0) {
162                     tflite::optimized_ops::TypedMemset<T>(
163                             outputData + tflite::Offset(extOutputShape, outB, outH, outW,
164                                                         outputDepth - rightDPadding),
165                             padValue, rightDPadding);
166                 }
167             }
168             if (rightWPadding != 0) {
169                 tflite::optimized_ops::TypedMemset<T>(
170                         outputData + tflite::Offset(extOutputShape, outB, outH,
171                                                     outputWidth - rightWPadding, 0),
172                         padValue, rightWPadding * outputDepth);
173             }
174         }
175         if (rightHPadding != 0) {
176             tflite::optimized_ops::TypedMemset<T>(
177                     outputData + tflite::Offset(extOutputShape, outB, outputHeight - rightHPadding,
178                                                 0, 0),
179                     padValue, rightHPadding * outputWidth * outputDepth);
180         }
181     }
182     if (rightBPadding != 0) {
183         tflite::optimized_ops::TypedMemset<T>(
184                 outputData + tflite::Offset(extOutputShape, outputBatch - rightBPadding, 0, 0, 0),
185                 padValue, rightBPadding * outputHeight * outputWidth * outputDepth);
186     }
187 
188     return true;
189 }
190 template bool padGeneric<float>(const float* inputData, const Shape& inputShape,
191                                 const int32_t* paddings, float padValue, float* outputData,
192                                 const Shape& outputShape);
193 template bool padGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
194                                    const int32_t* paddings, _Float16 padValue, _Float16* outputData,
195                                    const Shape& outputShape);
196 template bool padGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
197                                   const int32_t* paddings, uint8_t padValue, uint8_t* outputData,
198                                   const Shape& outputShape);
199 template bool padGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
200                                  const int32_t* paddings, int8_t padValue, int8_t* outputData,
201                                  const Shape& outputShape);
202 
203 template <typename T>
batchToSpaceGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,T * outputData,const Shape & outputShape)204 bool batchToSpaceGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
205                          T* outputData, const Shape& outputShape) {
206     // Needed by low level implementation, but not really used.
207     tflite::Dims<4> blockSizeDim, cropsDim;
208     const int32 crops[4] = {0, 0, 0, 0};
209     NNTRACE_COMP("optimized_ops::BatchToSpaceND");
210     tflite::optimized_ops::BatchToSpaceND(inputData, convertShapeToDims(inputShape), blockSize,
211                                           blockSizeDim, crops, cropsDim, outputData,
212                                           convertShapeToDims(outputShape));
213     return true;
214 }
215 template bool batchToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
216                                          const int32_t* blockSize, float* outputData,
217                                          const Shape& outputShape);
218 template bool batchToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
219                                             const int32_t* blockSize, _Float16* outputData,
220                                             const Shape& outputShape);
221 template bool batchToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
222                                            const int32_t* blockSize, uint8_t* outputData,
223                                            const Shape& outputShape);
224 template bool batchToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
225                                           const int32_t* blockSize, int8_t* outputData,
226                                           const Shape& outputShape);
227 
228 template <typename T>
spaceToBatchGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,const int32_t * padding,const Shape & paddingShape,T * outputData,const Shape & outputShape)229 bool spaceToBatchGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
230                          const int32_t* padding, const Shape& paddingShape, T* outputData,
231                          const Shape& outputShape) {
232     // Needed by low level implementation, but not really used.
233     tflite::RuntimeShape blockSizeDim;
234     NNTRACE_COMP("optimized_ops::SpaceToBatchND");
235     tflite::optimized_ops::SpaceToBatchND(
236             {.output_offset = outputShape.offset}, convertShapeToTflshape(inputShape), inputData,
237             blockSizeDim, blockSize, convertShapeToTflshape(paddingShape), padding,
238             convertShapeToTflshape(outputShape), outputData);
239     return true;
240 }
241 template bool spaceToBatchGeneric<float>(const float* inputData, const Shape& inputShape,
242                                          const int32_t* blockSize, const int32_t* padding,
243                                          const Shape& paddingShape, float* outputData,
244                                          const Shape& outputShape);
245 template bool spaceToBatchGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
246                                             const int32_t* blockSize, const int32_t* padding,
247                                             const Shape& paddingShape, _Float16* outputData,
248                                             const Shape& outputShape);
249 template bool spaceToBatchGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
250                                            const int32_t* blockSize, const int32_t* padding,
251                                            const Shape& paddingShape, uint8_t* outputData,
252                                            const Shape& outputShape);
253 template bool spaceToBatchGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
254                                           const int32_t* blockSize, const int32_t* padding,
255                                           const Shape& paddingShape, int8_t* outputData,
256                                           const Shape& outputShape);
257 
258 }  // namespace nn
259 }  // namespace android
260