1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include "HeatmapMaxKeypoint.h"
20 
21 #include <algorithm>
22 #include <cfloat>
23 #include <cmath>
24 #include <vector>
25 
26 #include "OperationResolver.h"
27 #include "OperationsExecutionUtils.h"
28 #include "Tracing.h"
29 
30 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
31 #include "CpuOperationUtils.h"
32 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
33 
34 namespace android {
35 namespace nn {
36 namespace heatmap_max_keypoint {
37 
38 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
39 namespace {
40 
41 // This function uses Taylor expansion up to the quatratic term to approximate bicubic
42 // upscaling result.
43 // 2nd order Taylor expansion: D(x) = D - b'x + 1/2 * x'Ax
44 // where D = grid[1][1], Taylor expansion center, the original score,
45 //       x = delta, the correction on max keypoint position,
46 //       D(x) = deltaScore, the accuracy score after correction
solveForDelta(const float grid[3][3],float * delta,float * deltaScore,float fpAtol=1e-5f,float fpRtol=1e-5f)47 static void solveForDelta(const float grid[3][3], float* delta, float* deltaScore,
48                           float fpAtol = 1e-5f, float fpRtol = 1e-5f) {
49     // b: negative 1st order derivative at center
50     // A: Hessian matrix at center (2nd order derivative)
51     float A[2][2], b[2];
52     b[0] = -(grid[1][2] - grid[1][0]) / 2.0f;
53     b[1] = -(grid[2][1] - grid[0][1]) / 2.0f;
54     A[0][0] = grid[1][0] - 2.0f * grid[1][1] + grid[1][2];
55     A[0][1] = (grid[2][2] - grid[2][0] - grid[0][2] + grid[0][0]) / 4.0f;
56     A[1][0] = A[0][1];
57     A[1][1] = grid[0][1] - 2.0f * grid[1][1] + grid[2][1];
58 
59     // solve Ax=b, where x=delta -> delta = inv(A) * b
60     float crossProd1 = A[0][0] * A[1][1], crossProd2 = A[0][1] * A[1][0];
61     float detA = crossProd1 - crossProd2;
62     // check if A is invertible
63     if (std::abs(detA) < (fpAtol + fpRtol * crossProd1)) return;
64     delta[0] = (A[1][1] * b[0] - A[0][1] * b[1]) / detA;
65     delta[1] = (A[0][0] * b[1] - A[1][0] * b[0]) / detA;
66 
67     // clip out of range delta, i.e. delta > 3/2
68     if (std::abs(delta[0]) > 1.5f || std::abs(delta[1]) > 1.5f) {
69         float scale = 1.5f / std::max(std::abs(delta[0]), std::abs(delta[1]));
70         delta[0] *= scale;
71         delta[1] *= scale;
72     }
73 
74     *deltaScore = grid[1][1] - b[0] * delta[0] - b[1] * delta[1] +
75                   ((A[0][0] * delta[0] + A[0][1] * delta[1]) * delta[0] +
76                    (A[1][0] * delta[0] + A[1][1] * delta[1]) * delta[1]) /
77                           2.0f;
78 }
79 
heatmapMaxKeypointFloat32Nhwc(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,float * outputScoreData,const Shape &,float * outputKeypointData,const Shape &,float fpAtol,float fpRtol)80 inline bool heatmapMaxKeypointFloat32Nhwc(const float* heatmap, const Shape& heatmapShape,
81                                           const float* boxes, const Shape& boxesShape,
82                                           float* outputScoreData, const Shape& /*outputScoreShape*/,
83                                           float* outputKeypointData,
84                                           const Shape& /*outputKeypointShape*/, float fpAtol,
85                                           float fpRtol) {
86     NNTRACE_TRANS("HeatmapMaxKeypoint");
87 
88     uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
89     uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 1);
90     uint32_t numKeypoints = getSizeOfDimension(heatmapShape, 3);
91     uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
92 
93     const float* heatmapBase = heatmap;
94     const float* boxInfoBase = boxes;
95     float* outputScoreBase = outputScoreData;
96     float* outputKeypointBase = outputKeypointData;
97     for (uint32_t i = 0; i < numBoxes; i++) {
98         NN_RET_CHECK_LE(boxInfoBase[0], boxInfoBase[2]);
99         NN_RET_CHECK_LE(boxInfoBase[1], boxInfoBase[3]);
100         for (uint32_t j = 0; j < numKeypoints; j++) {
101             // find max score and its index
102             uint32_t maxIndex = 0;
103             float maxScore = -FLT_MAX;
104             for (uint32_t k = 0; k < heatmapSize * heatmapSize; k++) {
105                 float val = heatmapBase[k * numKeypoints + j];
106                 if (maxScore < val) {
107                     maxScore = val;
108                     maxIndex = k;
109                 }
110             }
111 
112             uint32_t maxIndexWidth = maxIndex % heatmapSize;
113             uint32_t maxIndexHeight = maxIndex / heatmapSize;
114 
115             // get local 3x3 grid
116             float localGrid[3][3];
117             for (int32_t dh = -1; dh <= 1; dh++) {
118                 for (int32_t dw = -1; dw <= 1; dw++) {
119                     // cast uint32_t to int32_t
120                     int32_t h = static_cast<int32_t>(maxIndexHeight) + dh;
121                     int32_t w = static_cast<int32_t>(maxIndexWidth) + dw;
122 
123                     // use mirroring for out of bound indexing
124                     // need to ensure heatmapSize >= 2
125                     h = h < 0 ? 1 : (static_cast<uint32_t>(h) >= heatmapSize ? heatmapSize - 2 : h);
126                     w = w < 0 ? 1 : (static_cast<uint32_t>(w) >= heatmapSize ? heatmapSize - 2 : w);
127 
128                     uint32_t heatmapIndex = static_cast<uint32_t>(h) * heatmapSize * numKeypoints +
129                                             static_cast<uint32_t>(w) * numKeypoints + j;
130                     localGrid[dh + 1][dw + 1] = heatmapBase[heatmapIndex];
131                 }
132             }
133 
134             float delta[2] = {0.0f, 0.0f}, deltaScore = maxScore;
135             solveForDelta(localGrid, delta, &deltaScore, fpAtol, fpRtol);
136 
137             float wRoiStart = boxInfoBase[0];
138             float hRoiStart = boxInfoBase[1];
139             float wRoiEnd = boxInfoBase[2];
140             float hRoiEnd = boxInfoBase[3];
141             float roiWidth = wRoiEnd - wRoiStart;
142             float roiHeight = hRoiEnd - hRoiStart;
143             float wRelativePos = (static_cast<float>(maxIndexWidth) + delta[0] + 0.5f) /
144                                  static_cast<float>(heatmapSize);
145             float hRelativePos = (static_cast<float>(maxIndexHeight) + delta[1] + 0.5f) /
146                                  static_cast<float>(heatmapSize);
147             *outputScoreBase++ = deltaScore;
148             outputKeypointBase[0] = wRelativePos * roiWidth + wRoiStart;
149             outputKeypointBase[1] = hRelativePos * roiHeight + hRoiStart;
150             outputKeypointBase += 2;
151         }
152         boxInfoBase += boxInfoLength;
153         heatmapBase += heatmapSize * heatmapSize * numKeypoints;
154     }
155 
156     return true;
157 }
158 
heatmapMaxKeypointFloat32(const float * heatmap,const Shape & heatmapShape,const float * boxes,const Shape & boxesShape,bool layout,float * outputScoreData,const Shape & outputScoreShape,float * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)159 inline bool heatmapMaxKeypointFloat32(const float* heatmap, const Shape& heatmapShape,
160                                       const float* boxes, const Shape& boxesShape, bool layout,
161                                       float* outputScoreData, const Shape& outputScoreShape,
162                                       float* outputKeypointData, const Shape& outputKeypointShape,
163                                       float fpAtol, float fpRtol) {
164     std::vector<float> heatmap_nhwc;
165     Shape heatmapShape_nhwc;
166     if (layout) {
167         NN_RET_CHECK(convertNchwToNhwc(heatmap, heatmapShape, &heatmap_nhwc, &heatmapShape_nhwc));
168     }
169     const float* heatmap_tmp = layout ? heatmap_nhwc.data() : heatmap;
170     const Shape& heatmapShape_tmp = layout ? heatmapShape_nhwc : heatmapShape;
171     return heatmapMaxKeypointFloat32Nhwc(heatmap_tmp, heatmapShape_tmp, boxes, boxesShape,
172                                          outputScoreData, outputScoreShape, outputKeypointData,
173                                          outputKeypointShape, fpAtol, fpRtol);
174 }
175 
heatmapMaxKeypointQuant(const uint8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,uint8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)176 inline bool heatmapMaxKeypointQuant(const uint8_t* heatmap, const Shape& heatmapShape,
177                                     const uint16_t* boxes, const Shape& boxesShape, bool layout,
178                                     uint8_t* outputScoreData, const Shape& outputScoreShape,
179                                     uint16_t* outputKeypointData, const Shape& outputKeypointShape,
180                                     float fpAtol, float fpRtol) {
181     std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
182     convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
183     std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
184     convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
185     std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
186     std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
187     NN_RET_CHECK(heatmapMaxKeypointFloat32(
188             heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
189             outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
190             outputKeypointShape, fpAtol, fpRtol));
191     convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
192                           outputScoreData);
193     convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
194                           outputKeypointShape.offset, outputKeypointData);
195     return true;
196 }
197 
heatmapMaxKeypointQuant(const int8_t * heatmap,const Shape & heatmapShape,const uint16_t * boxes,const Shape & boxesShape,bool layout,int8_t * outputScoreData,const Shape & outputScoreShape,uint16_t * outputKeypointData,const Shape & outputKeypointShape,float fpAtol,float fpRtol)198 inline bool heatmapMaxKeypointQuant(const int8_t* heatmap, const Shape& heatmapShape,
199                                     const uint16_t* boxes, const Shape& boxesShape, bool layout,
200                                     int8_t* outputScoreData, const Shape& outputScoreShape,
201                                     uint16_t* outputKeypointData, const Shape& outputKeypointShape,
202                                     float fpAtol, float fpRtol) {
203     std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
204     convertQuantToFloat32(heatmap, heatmapShape.scale, heatmapShape.offset, &heatmap_float32);
205     std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
206     convertQuantToFloat32(boxes, boxesShape.scale, boxesShape.offset, &boxes_float32);
207     std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
208     std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
209     NN_RET_CHECK(heatmapMaxKeypointFloat32(
210             heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
211             outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
212             outputKeypointShape, fpAtol, fpRtol));
213     convertFloat32ToQuant(outputScore_float32, outputScoreShape.scale, outputScoreShape.offset,
214                           outputScoreData);
215     convertFloat32ToQuant(outputKeypoint_float32, outputKeypointShape.scale,
216                           outputKeypointShape.offset, outputKeypointData);
217     return true;
218 }
219 
220 }  // namespace
221 
prepare(IOperationExecutionContext * context)222 bool prepare(IOperationExecutionContext* context) {
223     bool layout = context->getInputValue<bool>(kLayoutScalar);
224     Shape heatmapShape = context->getInputShape(kHeatmapTensor);
225     Shape boxesShape = context->getInputShape(kBoxesTensor);
226     NN_RET_CHECK_EQ(getNumberOfDimensions(heatmapShape), 4u);
227     NN_RET_CHECK_EQ(getNumberOfDimensions(boxesShape), 2u);
228 
229     uint32_t numBoxes = getSizeOfDimension(heatmapShape, 0);
230     uint32_t heatmapSize = getSizeOfDimension(heatmapShape, 2);
231     uint32_t numKeypoints = getSizeOfDimension(heatmapShape, layout ? 1 : 3);
232     uint32_t boxInfoLength = getSizeOfDimension(boxesShape, 1);
233     NN_RET_CHECK_EQ(getSizeOfDimension(heatmapShape, layout ? 3 : 1), heatmapSize);
234     NN_RET_CHECK_GE(heatmapSize, 2u);
235     NN_RET_CHECK_EQ(getSizeOfDimension(boxesShape, 0), numBoxes);
236     NN_RET_CHECK_EQ(boxInfoLength, 4u);
237 
238     if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
239         heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
240         NN_RET_CHECK_EQ(boxesShape.scale, 0.125f);
241         NN_RET_CHECK_EQ(boxesShape.offset, 0);
242     }
243 
244     Shape outputScore = context->getOutputShape(kOutputScoreTensor);
245     outputScore.type = heatmapShape.type;
246     outputScore.dimensions = {numBoxes, numKeypoints};
247     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScore));
248 
249     Shape outputKeypoint = context->getOutputShape(kOutputKeypointTensor);
250     outputKeypoint.type = boxesShape.type;
251     outputKeypoint.dimensions = {numBoxes, numKeypoints, 2};
252     outputKeypoint.offset = 0;
253     outputKeypoint.scale = 0.f;
254     if (heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM ||
255         heatmapShape.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
256         outputKeypoint.scale = 0.125f;
257     }
258     NN_RET_CHECK(context->setOutputShape(kOutputKeypointTensor, outputKeypoint));
259     return true;
260 }
261 
execute(IOperationExecutionContext * context)262 bool execute(IOperationExecutionContext* context) {
263     bool layout = context->getInputValue<bool>(kLayoutScalar);
264     switch (context->getInputType(kHeatmapTensor)) {
265         case OperandType::TENSOR_FLOAT16: {
266             const auto heatmap = context->getInputBuffer<_Float16>(kHeatmapTensor);
267             const auto heatmapShape = context->getInputShape(kHeatmapTensor);
268             const auto boxes = context->getInputBuffer<_Float16>(kBoxesTensor);
269             const auto boxesShape = context->getInputShape(kBoxesTensor);
270             auto outputScoreData = context->getOutputBuffer<_Float16>(kOutputScoreTensor);
271             const auto outputScoreShape = context->getOutputShape(kOutputScoreTensor);
272             auto outputKeypointData = context->getOutputBuffer<_Float16>(kOutputKeypointTensor);
273             const auto outputKeypointShape = context->getOutputShape(kOutputKeypointTensor);
274             std::vector<float> heatmap_float32(getNumberOfElements(heatmapShape));
275             convertFloat16ToFloat32(heatmap, &heatmap_float32);
276             std::vector<float> boxes_float32(getNumberOfElements(boxesShape));
277             convertFloat16ToFloat32(boxes, &boxes_float32);
278             std::vector<float> outputScore_float32(getNumberOfElements(outputScoreShape));
279             std::vector<float> outputKeypoint_float32(getNumberOfElements(outputKeypointShape));
280             NN_RET_CHECK(heatmapMaxKeypointFloat32(
281                     heatmap_float32.data(), heatmapShape, boxes_float32.data(), boxesShape, layout,
282                     outputScore_float32.data(), outputScoreShape, outputKeypoint_float32.data(),
283                     outputKeypointShape, 1e-3f, 1e-3f));
284             convertFloat32ToFloat16(outputScore_float32, outputScoreData);
285             convertFloat32ToFloat16(outputKeypoint_float32, outputKeypointData);
286             return true;
287         }
288         case OperandType::TENSOR_FLOAT32: {
289             return heatmapMaxKeypointFloat32(context->getInputBuffer<float>(kHeatmapTensor),
290                                              context->getInputShape(kHeatmapTensor),
291                                              context->getInputBuffer<float>(kBoxesTensor),
292                                              context->getInputShape(kBoxesTensor), layout,
293                                              context->getOutputBuffer<float>(kOutputScoreTensor),
294                                              context->getOutputShape(kOutputScoreTensor),
295                                              context->getOutputBuffer<float>(kOutputKeypointTensor),
296                                              context->getOutputShape(kOutputKeypointTensor), 1e-5f,
297                                              1e-5f);
298         }
299         case OperandType::TENSOR_QUANT8_ASYMM: {
300             return heatmapMaxKeypointQuant(
301                     context->getInputBuffer<uint8_t>(kHeatmapTensor),
302                     context->getInputShape(kHeatmapTensor),
303                     context->getInputBuffer<uint16_t>(kBoxesTensor),
304                     context->getInputShape(kBoxesTensor), layout,
305                     context->getOutputBuffer<uint8_t>(kOutputScoreTensor),
306                     context->getOutputShape(kOutputScoreTensor),
307                     context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
308                     context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
309         }
310         case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
311             return heatmapMaxKeypointQuant(
312                     context->getInputBuffer<int8_t>(kHeatmapTensor),
313                     context->getInputShape(kHeatmapTensor),
314                     context->getInputBuffer<uint16_t>(kBoxesTensor),
315                     context->getInputShape(kBoxesTensor), layout,
316                     context->getOutputBuffer<int8_t>(kOutputScoreTensor),
317                     context->getOutputShape(kOutputScoreTensor),
318                     context->getOutputBuffer<uint16_t>(kOutputKeypointTensor),
319                     context->getOutputShape(kOutputKeypointTensor), 1e-5f, 1e-5f);
320         }
321         default:
322             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
323     }
324 }
325 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
326 
327 }  // namespace heatmap_max_keypoint
328 
329 NN_REGISTER_OPERATION_DEFAULT_VALIDATION(HEATMAP_MAX_KEYPOINT, heatmap_max_keypoint::prepare,
330                                          heatmap_max_keypoint::execute);
331 
332 }  // namespace nn
333 }  // namespace android
334