1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef NN_EXPERIMENTAL_FEATURE
18 
19 #include "Densify.h"
20 
21 #include <cstddef>
22 #include <cstdint>
23 #include <functional>
24 #include <iostream>
25 #include <numeric>
26 #include <vector>
27 
28 #include "OperationResolver.h"
29 #include "OperationsExecutionUtils.h"
30 #include "OperationsValidationUtils.h"
31 #include "Tracing.h"
32 #include "nnapi/OperandTypes.h"
33 #include "nnapi/TypeUtils.h"
34 #include "nnapi/Validation.h"
35 
36 #define LOG_TAG "Operations"
37 
38 namespace android {
39 namespace nn {
40 namespace densify_op {
41 
42 /**
43  * getFlattenedIndex:
44  * Gets the index of destData where indices points to. Uses shape and origRank
45  * for calculations.
46  */
getFlattenedIndex(const std::vector<int32_t> & indices,const std::vector<uint32_t> & shape,const int origRank)47 uint64_t getFlattenedIndex(const std::vector<int32_t>& indices, const std::vector<uint32_t>& shape,
48                            const int origRank) {
49     uint64_t index = 0;
50     int subElems = 1;
51     // origRank = size of destDims
52     for (int i = origRank - 1; i >= 0; i--) {
53         index += uint64_t(indices[i] * subElems);
54         subElems *= shape[i];
55     }
56     return index;
57 }
58 
59 /**
60  * populate (Recursive Function):
61  * Used to populate the destData with elements from srcData one value at a time.
62  * Inputs:
63  * * srcData = input data of non-zero values.
64  * * indices = used to determine the index in destData where we write srcData to. Uses block
65  *   dimension.
66  * * level = used to keep track of recursion level. Each recursive instance exits when level == size
67  *   of traversal order.
68  * * prevIdx = used to keep placement in array segments and srcData.
69  * * destData = dense output data. Input being written to.
70  * * destDims = shape of the output tensor. Used to calculate the flattened idx.
71  * * dimFormat = dimension format for each entry in traversal order. The format is either DENSE
72  *   (dimFormat[i] == 0) or SPARSE_CSR (dimFormat[i] == 1). Format is significant to determine how
73  *   recursive iterations will occur and what metadata is stored in dimMetadata.
74  * * traversalOrder = contains n+k elements. The first n elements are a permutation of the dense
75  *   tensor shape. The last k elements are a permutation of the block dimensions. Used to determine
76  *   order of traversal paths.
77  * * blockSize = dense size of blocks. The last k elements of dimensions.
78  * * blockMap = Used to determine how the block dimension maps to the original tensor dimension.
79  * * dimMetadata = metadata varies depending on dimFormat values. If format is DENSE,
80  *   dimMetadata[i*2][0] is the total number of elements in the dense tensor on the ith traversal
81  *   path, and recursive iterations are through a standard for loop from 0 to dimMetadata[i*2][0].
82  *   If format is SPARSE_CSR, dimMetadata[i*2] is a vector of array segments and
83  *   dimMetadata[i*2+1] is a vector of array indices. The next recursive iterations will be
84  *   looping through the array segments vector (since array segments are the same as row pointers in
85  *   CSR format, the ith entry should never be greater than the ith+1 entry) and modifying the input
86  *   indices with elements from the array indices vector.
87  * * origRank = the size of destDims. Used for calculating flattened index of indices.
88  */
89 template <typename T>
populate(const T * srcData,std::vector<int32_t> * indices,uint32_t level,uint32_t prevIdx,T * destData,const std::vector<uint32_t> & destDims,const std::vector<int32_t> & dimFormat,const int32_t * traversalOrder,const std::vector<int32_t> & blockSize,const int32_t * blockMap,const std::vector<std::vector<int32_t>> & dimMetadata,const int origRank)90 void populate(const T* srcData, std::vector<int32_t>* indices, uint32_t level, uint32_t prevIdx,
91               T* destData, const std::vector<uint32_t>& destDims,
92               const std::vector<int32_t>& dimFormat, const int32_t* traversalOrder,
93               const std::vector<int32_t>& blockSize, const int32_t* blockMap,
94               const std::vector<std::vector<int32_t>>& dimMetadata, const int origRank) {
95     if (level == (*indices).size()) {  // level == size of traversal order
96         std::vector<int> origIdx(origRank);
97         size_t i = 0;
98         // Calculating origIdx using dense tensor dimensions
99         for (; i < origIdx.size(); i++) {
100             int origDim = traversalOrder[i];
101             origIdx[origDim] = (*indices)[i];
102         }
103         // Modifying origIdx using block dimensions
104         for (; i < (*indices).size(); i++) {
105             const int blockIdx = traversalOrder[i] - origRank;
106             const int origDim = blockMap[blockIdx];
107             origIdx[origDim] = origIdx[origDim] * blockSize[blockIdx] + (*indices)[i];
108         }
109         // Writing srcData to destData
110         destData[getFlattenedIndex(origIdx, destDims, origRank)] = srcData[prevIdx];
111         return;
112     }
113     const int metadataIdx = 2 * level;
114     if (dimFormat[level] == DENSE) {  // DENSE dimension format
115         const int shapeOfLevel = dimMetadata[metadataIdx].front();
116         for (int i = 0; i < shapeOfLevel; i++) {
117             (*indices)[level] = i;
118             populate(srcData, indices, level + 1, prevIdx * shapeOfLevel + i, destData, destDims,
119                      dimFormat, traversalOrder, blockSize, blockMap, dimMetadata, origRank);
120         }
121     } else {  // SPARSE_CSR dimension format
122         const auto& arraySegments = dimMetadata[metadataIdx];
123         const auto& arrayIndices = dimMetadata[metadataIdx + 1];
124         for (int i = arraySegments[prevIdx]; i < arraySegments[prevIdx + 1]; i++) {
125             (*indices)[level] = arrayIndices[i];
126             populate(srcData, indices, level + 1, i, destData, destDims, dimFormat, traversalOrder,
127                      blockSize, blockMap, dimMetadata, origRank);
128         }
129     }
130 }
131 
132 /**
133  * arrToVector:
134  * Converts a T array into an T vector.
135  */
136 template <typename T>
arrToVector(const T * arr,uint32_t size)137 std::vector<T> arrToVector(const T* arr, uint32_t size) {
138     return arr == nullptr ? std::vector<T>() : std::vector<T>(arr, arr + size);
139 }
140 
141 template <typename T>
densify(IOperationExecutionContext * context)142 inline bool densify(IOperationExecutionContext* context) {
143     // Getting all inputs
144     std::vector<Shape> inputShapes;
145     const uint32_t inputCount = context->getNumInputs();
146     inputShapes.reserve(inputCount);
147     const T* srcData = context->getInputBuffer<T>(kInputTensor);
148     inputShapes.push_back(context->getInputShape(kInputTensor));
149     const int32_t* traversalOrder = context->getInputBuffer<int32_t>(kInputTravOrder);
150     inputShapes.push_back(context->getInputShape(kInputTravOrder));
151     const int32_t* blockMap = context->getInputBuffer<int32_t>(kInputBlockMap);
152     inputShapes.push_back(context->getInputShape(kInputBlockMap));
153     const int32_t* dimFormatPtr = context->getInputBuffer<int32_t>(kInputDimFormat);
154     inputShapes.push_back(context->getInputShape(kInputDimFormat));
155     const int32_t* dimensionsPtr = context->getInputBuffer<int32_t>(kInputDimensions);
156     inputShapes.push_back(context->getInputShape(kInputDimensions));
157 
158     std::vector<const int32_t*> dimMetadataPtrs;
159     for (uint32_t i = kInputArrSeg; i < inputCount; i++) {
160         inputShapes.push_back(context->getInputShape(i));
161         const int32_t* metadata = context->getInputBuffer<int32_t>(i);
162         dimMetadataPtrs.push_back(metadata);
163     }
164     Shape destShape = context->getOutputShape(kOutputTensor);
165 
166     // Organizing dimFormat, dimensions, dimMetadata into vectors
167     std::vector<int32_t> dimFormat(
168             inputShapes[kInputDimFormat].dimensions.front());  // size of dimFormatPtr
169     std::vector<int32_t> dimensions(dimFormat.size());
170     std::vector<std::vector<int32_t>> dimMetadata(2 * dimFormat.size());
171     for (size_t i = 0; i < dimFormat.size(); i++) {
172         dimFormat[i] = dimFormatPtr[i];
173         dimensions[i] = dimensionsPtr[i];
174         if (dimFormat[i] == 0) {
175             dimMetadata[i * 2] = {dimensions[i]};
176         } else {
177             dimMetadata[i * 2] =  // array segments
178                     arrToVector(dimMetadataPtrs[i * 2],
179                                 inputShapes[i * 2 + kInputArrSeg].dimensions.front());
180             dimMetadata[i * 2 + 1] =  // array indices
181                     arrToVector(dimMetadataPtrs[i * 2 + 1],
182                                 inputShapes[i * 2 + kInputArrIdx].dimensions.front());
183         }
184     }
185 
186     // Creating blockSize vector
187     const int origRank = destShape.dimensions.size();
188     std::vector<int32_t> blockSize(
189             inputShapes[kInputBlockMap].dimensions.front());  // size of block map
190     for (uint32_t i = 0; i < inputShapes[kInputBlockMap].dimensions.front(); i++) {
191         const int32_t origDim = traversalOrder[origRank + i];
192         blockSize[i] = dimensions[origDim];
193     }
194 
195     // Calculating the number of output entries
196     const size_t denseTotal =
197             std::accumulate(destShape.dimensions.begin(), destShape.dimensions.end(),
198                             static_cast<size_t>(1), std::multiplies<>{});
199     T zeroPoint = T();
200     if (const OperandType type = inputShapes.front().type;
201         type == OperandType::TENSOR_QUANT8_ASYMM ||
202         type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED ||
203         type == OperandType::TENSOR_QUANT16_ASYMM) {
204         zeroPoint = static_cast<T>(inputShapes.front().offset);
205     }
206 
207     T* destData = context->getOutputBuffer<T>(kOutputTensor);
208     for (size_t i = 0; i < denseTotal; i++) {
209         destData[i] = zeroPoint;
210     }
211 
212     std::vector<int32_t> indices(
213             inputShapes[kInputTravOrder].dimensions.front());  // size of traversal order
214     populate(srcData, &indices, 0u, 0u, destData, destShape.dimensions, dimFormat, traversalOrder,
215              blockSize, blockMap, dimMetadata, origRank);
216     return true;
217 }
218 
prepare(IOperationExecutionContext * context)219 bool prepare(IOperationExecutionContext* context) {
220     // Setting OutputShape
221     Shape destShape = context->getInputShape(kInputTensor);
222 
223     const int32_t* traversalOrder = context->getInputBuffer<int32_t>(kInputTravOrder);
224     const int32_t* blockMap = context->getInputBuffer<int32_t>(kInputBlockMap);
225     const int32_t* dimensions = context->getInputBuffer<int32_t>(kInputDimensions);
226     Shape dimensionsShape = context->getInputShape(kInputDimensions);
227     Shape blockMapShape = context->getInputShape(kInputBlockMap);
228     const uint32_t origRank = dimensionsShape.dimensions.front() - blockMapShape.dimensions.front();
229     std::vector<uint32_t> destDims(origRank);
230 
231     size_t i = 0;
232     for (; i < destDims.size(); i++) {
233         const int32_t origDim = traversalOrder[i];
234         destDims[origDim] = dimensions[i];
235     }
236     for (; i < dimensionsShape.dimensions.front(); i++) {
237         const int32_t traversalIdx = traversalOrder[i] - origRank;
238         const int32_t origDim = blockMap[traversalIdx];
239         destDims[origDim] *= dimensions[i];
240     }
241     destShape.dimensions = destDims;
242     return context->setOutputShape(kOutputTensor, destShape);
243 }
244 
execute(IOperationExecutionContext * context)245 bool execute(IOperationExecutionContext* context) {
246     switch (context->getInputType(kInputTensor)) {
247         case OperandType::TENSOR_BOOL8:
248             return densify<bool8>(context);
249         case OperandType::TENSOR_FLOAT32:
250             return densify<float>(context);
251         case OperandType::TENSOR_FLOAT16:
252             return densify<_Float16>(context);
253         case OperandType::TENSOR_INT32:
254             return densify<int32_t>(context);
255         case OperandType::TENSOR_QUANT8_ASYMM:
256             return densify<uint8_t>(context);
257         case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
258         case OperandType::TENSOR_QUANT8_SYMM:
259             return densify<int8_t>(context);
260         case OperandType::TENSOR_QUANT16_SYMM:
261             return densify<int16_t>(context);
262         case OperandType::TENSOR_QUANT16_ASYMM:
263             return densify<uint16_t>(context);
264         default:
265             return false;
266     }
267 }
268 
269 }  // namespace densify_op
270 
271 NN_REGISTER_OPERATION_DEFAULT_VALIDATION(DENSIFY, densify_op::prepare, densify_op::execute,
272                                          .allowOmittedOperand = true);
273 
274 }  // namespace nn
275 }  // namespace android
276 
277 #endif  // NN_EXPERIMENTAL_FEATURE