1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // Contains the implementation of the operations.
18
19 #define LOG_TAG "Operations"
20
21 #pragma clang diagnostic push
22 #pragma clang diagnostic ignored "-Wunused-parameter"
23 #pragma clang diagnostic ignored "-Wsign-compare"
24 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
25 #include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
26 #include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
27 #pragma clang diagnostic pop
28
29 #include <vector>
30
31 #include "CpuOperationUtils.h"
32 #include "LegacyUtils.h"
33 #include "Operations.h"
34 #include "Reshape.h"
35 #include "Tracing.h"
36
37 namespace android {
38 namespace nn {
39
copyData(const void * inputData,const Shape & inputShape,void * outputData,const Shape &)40 bool copyData(const void* inputData, const Shape& inputShape, void* outputData,
41 const Shape& /*outputShape*/) {
42 NNTRACE_COMP("copyData");
43 size_t count = nonExtensionOperandSizeOfData(inputShape.type, inputShape.dimensions);
44 memcpy(outputData, inputData, count);
45 return true;
46 }
47
48 template <typename T>
depthToSpaceGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)49 bool depthToSpaceGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
50 T* outputData, const Shape& outputShape) {
51 NNTRACE_COMP("optimized_ops::DepthToSpace");
52 tflite::optimized_ops::DepthToSpace(inputData, convertShapeToDims(inputShape), blockSize,
53 outputData, convertShapeToDims(outputShape));
54 return true;
55 }
56 template bool depthToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
57 int32_t blockSize, float* outputData,
58 const Shape& outputShape);
59 template bool depthToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
60 int32_t blockSize, _Float16* outputData,
61 const Shape& outputShape);
62 template bool depthToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
63 int32_t blockSize, uint8_t* outputData,
64 const Shape& outputShape);
65 template bool depthToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
66 int32_t blockSize, int8_t* outputData,
67 const Shape& outputShape);
68
69 template <typename T>
spaceToDepthGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)70 bool spaceToDepthGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
71 T* outputData, const Shape& outputShape) {
72 NNTRACE_COMP("optimized_ops::SpaceToDepth");
73 tflite::optimized_ops::SpaceToDepth(inputData, convertShapeToDims(inputShape), blockSize,
74 outputData, convertShapeToDims(outputShape));
75 return true;
76 }
77 template bool spaceToDepthGeneric<float>(const float* inputData, const Shape& inputShape,
78 int32_t blockSize, float* outputData,
79 const Shape& outputShape);
80 template bool spaceToDepthGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
81 int32_t blockSize, _Float16* outputData,
82 const Shape& outputShape);
83 template bool spaceToDepthGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
84 int32_t blockSize, uint8_t* outputData,
85 const Shape& outputShape);
86 template bool spaceToDepthGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
87 int32_t blockSize, int8_t* outputData,
88 const Shape& outputShape);
89
90 template <typename T>
padGeneric(const T * inputData,const Shape & inputShape,const int32_t * paddings,T padValue,T * outputData,const Shape & outputShape)91 bool padGeneric(const T* inputData, const Shape& inputShape, const int32_t* paddings, T padValue,
92 T* outputData, const Shape& outputShape) {
93 NNTRACE_TRANS("padGeneric");
94
95 // Based on
96 // http://google3/third_party/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h?l=6194&rcl=213557260
97
98 // TFLite runtime calls are currently fixed at 4 dimensions. Copy inputs so
99 // we can pad them to 4 dims (yes, we are "padding the padding").
100 int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(inputShape));
101 NN_OPS_CHECK(numInputDims <= 4);
102 std::vector<int> leftPaddings(4 - numInputDims, 0);
103 std::vector<int> rightPaddings(4 - numInputDims, 0);
104 for (int32_t i = 0; i < numInputDims; ++i) {
105 leftPaddings.push_back(paddings[i * 2]);
106 rightPaddings.push_back(paddings[i * 2 + 1]);
107 }
108 const int leftBPadding = leftPaddings[0];
109 const int leftHPadding = leftPaddings[1];
110 const int leftWPadding = leftPaddings[2];
111 const int leftDPadding = leftPaddings[3];
112 const int rightBPadding = rightPaddings[0];
113 const int rightHPadding = rightPaddings[1];
114 const int rightWPadding = rightPaddings[2];
115 const int rightDPadding = rightPaddings[3];
116
117 const auto extInputShape =
118 tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(inputShape));
119 const auto extOutputShape =
120 tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(outputShape));
121
122 const int outputBatch = extOutputShape.Dims(0);
123 const int outputHeight = extOutputShape.Dims(1);
124 const int outputWidth = extOutputShape.Dims(2);
125 const int outputDepth = extOutputShape.Dims(3);
126
127 const int inputDepth = extInputShape.Dims(3);
128
129 NNTRACE_COMP_SWITCH("padGeneric");
130
131 if (leftBPadding != 0) {
132 tflite::optimized_ops::TypedMemset<T>(
133 outputData, padValue, leftBPadding * outputHeight * outputWidth * outputDepth);
134 }
135 for (int outB = leftBPadding; outB < outputBatch - rightBPadding; ++outB) {
136 if (leftHPadding != 0) {
137 tflite::optimized_ops::TypedMemset<T>(
138 outputData + tflite::Offset(extOutputShape, outB, 0, 0, 0), padValue,
139 leftHPadding * outputWidth * outputDepth);
140 }
141 for (int outH = leftHPadding; outH < outputHeight - rightHPadding; ++outH) {
142 if (leftWPadding != 0) {
143 tflite::optimized_ops::TypedMemset<T>(
144 outputData + tflite::Offset(extOutputShape, outB, outH, 0, 0), padValue,
145 leftWPadding * outputDepth);
146 }
147 for (int outW = leftWPadding; outW < outputWidth - rightWPadding; ++outW) {
148 if (leftDPadding != 0) {
149 tflite::optimized_ops::TypedMemset<T>(
150 outputData + tflite::Offset(extOutputShape, outB, outH, outW, 0),
151 padValue, leftDPadding);
152 }
153
154 T* out =
155 outputData + tflite::Offset(extOutputShape, outB, outH, outW, leftDPadding);
156 const T* in =
157 inputData + tflite::Offset(extInputShape, outB - leftBPadding,
158 outH - leftHPadding, outW - leftWPadding, 0);
159 memcpy(out, in, inputDepth * sizeof(T));
160
161 if (rightDPadding != 0) {
162 tflite::optimized_ops::TypedMemset<T>(
163 outputData + tflite::Offset(extOutputShape, outB, outH, outW,
164 outputDepth - rightDPadding),
165 padValue, rightDPadding);
166 }
167 }
168 if (rightWPadding != 0) {
169 tflite::optimized_ops::TypedMemset<T>(
170 outputData + tflite::Offset(extOutputShape, outB, outH,
171 outputWidth - rightWPadding, 0),
172 padValue, rightWPadding * outputDepth);
173 }
174 }
175 if (rightHPadding != 0) {
176 tflite::optimized_ops::TypedMemset<T>(
177 outputData + tflite::Offset(extOutputShape, outB, outputHeight - rightHPadding,
178 0, 0),
179 padValue, rightHPadding * outputWidth * outputDepth);
180 }
181 }
182 if (rightBPadding != 0) {
183 tflite::optimized_ops::TypedMemset<T>(
184 outputData + tflite::Offset(extOutputShape, outputBatch - rightBPadding, 0, 0, 0),
185 padValue, rightBPadding * outputHeight * outputWidth * outputDepth);
186 }
187
188 return true;
189 }
190 template bool padGeneric<float>(const float* inputData, const Shape& inputShape,
191 const int32_t* paddings, float padValue, float* outputData,
192 const Shape& outputShape);
193 template bool padGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
194 const int32_t* paddings, _Float16 padValue, _Float16* outputData,
195 const Shape& outputShape);
196 template bool padGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
197 const int32_t* paddings, uint8_t padValue, uint8_t* outputData,
198 const Shape& outputShape);
199 template bool padGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
200 const int32_t* paddings, int8_t padValue, int8_t* outputData,
201 const Shape& outputShape);
202
203 template <typename T>
batchToSpaceGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,T * outputData,const Shape & outputShape)204 bool batchToSpaceGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
205 T* outputData, const Shape& outputShape) {
206 // Needed by low level implementation, but not really used.
207 tflite::Dims<4> blockSizeDim, cropsDim;
208 const int32 crops[4] = {0, 0, 0, 0};
209 NNTRACE_COMP("optimized_ops::BatchToSpaceND");
210 tflite::optimized_ops::BatchToSpaceND(inputData, convertShapeToDims(inputShape), blockSize,
211 blockSizeDim, crops, cropsDim, outputData,
212 convertShapeToDims(outputShape));
213 return true;
214 }
215 template bool batchToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
216 const int32_t* blockSize, float* outputData,
217 const Shape& outputShape);
218 template bool batchToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
219 const int32_t* blockSize, _Float16* outputData,
220 const Shape& outputShape);
221 template bool batchToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
222 const int32_t* blockSize, uint8_t* outputData,
223 const Shape& outputShape);
224 template bool batchToSpaceGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
225 const int32_t* blockSize, int8_t* outputData,
226 const Shape& outputShape);
227
228 template <typename T>
spaceToBatchGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,const int32_t * padding,const Shape & paddingShape,T * outputData,const Shape & outputShape)229 bool spaceToBatchGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
230 const int32_t* padding, const Shape& paddingShape, T* outputData,
231 const Shape& outputShape) {
232 // Needed by low level implementation, but not really used.
233 tflite::RuntimeShape blockSizeDim;
234 NNTRACE_COMP("optimized_ops::SpaceToBatchND");
235 tflite::optimized_ops::SpaceToBatchND(
236 {.output_offset = outputShape.offset}, convertShapeToTflshape(inputShape), inputData,
237 blockSizeDim, blockSize, convertShapeToTflshape(paddingShape), padding,
238 convertShapeToTflshape(outputShape), outputData);
239 return true;
240 }
241 template bool spaceToBatchGeneric<float>(const float* inputData, const Shape& inputShape,
242 const int32_t* blockSize, const int32_t* padding,
243 const Shape& paddingShape, float* outputData,
244 const Shape& outputShape);
245 template bool spaceToBatchGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
246 const int32_t* blockSize, const int32_t* padding,
247 const Shape& paddingShape, _Float16* outputData,
248 const Shape& outputShape);
249 template bool spaceToBatchGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
250 const int32_t* blockSize, const int32_t* padding,
251 const Shape& paddingShape, uint8_t* outputData,
252 const Shape& outputShape);
253 template bool spaceToBatchGeneric<int8_t>(const int8_t* inputData, const Shape& inputShape,
254 const int32_t* blockSize, const int32_t* padding,
255 const Shape& paddingShape, int8_t* outputData,
256 const Shape& outputShape);
257
258 } // namespace nn
259 } // namespace android
260