1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include "SVDF.h"
20
21 #include <tensorflow/lite/kernels/internal/tensor_utils.h>
22
23 #include <algorithm>
24 #include <vector>
25
26 #include "CpuExecutor.h"
27 #include "CpuOperationUtils.h"
28 #include "Tracing.h"
29
30 namespace android {
31 namespace nn {
32
SVDF(const Operation & operation,RunTimeOperandInfo * operands)33 SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
34 NNTRACE_TRANS("SVDF::SVDF");
35 input_ = GetInput(operation, operands, kInputTensor);
36 weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
37 weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
38 bias_ = GetInput(operation, operands, kBiasTensor);
39 state_in_ = GetInput(operation, operands, kStateInTensor);
40
41 const auto& rankOperand = *GetInput(operation, operands, kRankParam);
42 params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
43 const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
44 params_.activation_ = static_cast<ActivationFn>(getScalarDataWithDefault<int>(
45 activationOperand, TfLiteFusedActivation::kTfLiteActNone));
46
47 state_out_ = GetOutput(operation, operands, kStateOutTensor);
48 output_ = GetOutput(operation, operands, kOutputTensor);
49 }
50
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * stateShape,Shape * outputShape)51 bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
52 Shape* outputShape) {
53 NNTRACE_TRANS("SVDF::Prepare");
54 // Check we have all the inputs and outputs we need.
55 const int num_inputs = NumInputsWithValues(operation, operands);
56
57 NN_CHECK(num_inputs == 6 || num_inputs == 7);
58 constexpr int requiredInputs[] = {
59 kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
60 kRankParam, kActivationParam,
61 };
62 for (const int requiredInput : requiredInputs) {
63 NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
64 << "required input " << requiredInput << " is omitted";
65 }
66 NN_CHECK_EQ(NumOutputs(operation), 2);
67
68 // Check that the scalar operands' buffers are large enough.
69 const auto& rankOperand = *GetInput(operation, operands, kRankParam);
70 NN_RET_CHECK(rankOperand.length >= sizeof(int));
71 const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
72 NN_RET_CHECK(activationOperand.length >= sizeof(int));
73
74 const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
75 const RunTimeOperandInfo* weights_feature =
76 GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
77 const RunTimeOperandInfo* weights_time =
78 GetInput(operation, operands, SVDF::kWeightsTimeTensor);
79
80 // Check all the parameters of tensor match within themselves and match the
81 // input configuration.
82 const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
83 const uint32_t batch_size = SizeOfDimension(input, 0);
84 const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
85 NN_CHECK_EQ(num_filters % rank, 0u);
86 const uint32_t num_units = num_filters / rank;
87 const uint32_t memory_size = SizeOfDimension(weights_time, 1);
88 NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
89 NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
90
91 const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
92 if (!IsNullInput(bias)) {
93 NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
94 }
95
96 // Resize state.
97 const Shape& inputShape = input->shape();
98 stateShape->type = inputShape.type;
99 stateShape->dimensions = {batch_size, memory_size * num_filters};
100 stateShape->offset = inputShape.offset;
101 stateShape->scale = inputShape.scale;
102
103 // Resize output.
104 outputShape->type = inputShape.type;
105 outputShape->dimensions = {batch_size, num_units};
106 outputShape->offset = inputShape.offset;
107 outputShape->scale = inputShape.scale;
108
109 return true;
110 }
111
Eval()112 bool SVDF::Eval() {
113 NNTRACE_TRANS("SVDF::Eval");
114 switch (input_->type) {
115 case OperandType::TENSOR_FLOAT16: {
116 std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
117 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
118 std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
119 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
120 &inputStateDataFloat32);
121 std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
122 if (!IsNullInput(bias_)) {
123 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
124 &biasDataFloat32);
125 }
126 std::vector<float> weightsFeatureDataFloat32(
127 getNumberOfElements(weights_feature_->shape()));
128 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
129 &weightsFeatureDataFloat32);
130 std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
131 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
132 &weightsTimeDataFloat32);
133 std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
134 std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
135
136 EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
137 biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
138 weightsTimeDataFloat32.data(), outputDataFloat32.data(),
139 outputStateDataFloat32.data());
140 convertFloat32ToFloat16(outputDataFloat32,
141 reinterpret_cast<_Float16*>(output_->buffer));
142 convertFloat32ToFloat16(outputStateDataFloat32,
143 reinterpret_cast<_Float16*>(state_out_->buffer));
144 break;
145 }
146 case OperandType::TENSOR_FLOAT32: {
147 EvalFloat32(reinterpret_cast<float*>(input_->buffer),
148 reinterpret_cast<float*>(state_in_->buffer),
149 reinterpret_cast<float*>(bias_->buffer),
150 reinterpret_cast<float*>(weights_feature_->buffer),
151 reinterpret_cast<float*>(weights_time_->buffer),
152 reinterpret_cast<float*>(output_->buffer),
153 reinterpret_cast<float*>(state_out_->buffer));
154 break;
155 }
156 default: {
157 LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
158 return false;
159 }
160 }
161 return true;
162 }
163
EvalFloat32(const float * inputData,const float * inputStateData,const float * biasData,const float * weightsFeatureData,const float * weightsTimeData,float * outputData,float * outputStateData)164 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
165 const float* weightsFeatureData, const float* weightsTimeData,
166 float* outputData, float* outputStateData) {
167 NNTRACE_COMP("SVDF::EvalFloat32");
168
169 const int rank = params_.rank_;
170 const int batch_size = SizeOfDimension(input_, 0);
171 const int input_size = SizeOfDimension(input_, 1);
172 const int num_filters = SizeOfDimension(weights_feature_, 0);
173 const int num_units = num_filters / rank;
174 const int memory_size = SizeOfDimension(weights_time_, 1);
175
176 memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
177 // Compute conv1d(inputs, weights_feature).
178 for (int b = 0; b < batch_size; b++) {
179 float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
180 for (int c = 0; c < num_filters; c++) {
181 float* state_ptr = state_ptr_batch + c * memory_size;
182 state_ptr[memory_size - 1] = 0.0;
183 }
184 }
185
186 // Clear scratch (the matmul is accumulative).
187 float scratch[batch_size * num_filters];
188 std::fill_n(scratch, batch_size * num_filters, 0.0f);
189 tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
190 weightsFeatureData, num_filters, input_size, inputData, batch_size, scratch);
191
192 // Copy the latest activation from scratch into activation_state:
193 // The last, i.e. (memory_size-1)th entry for each batch, and filter.
194 for (int i = 0; i < batch_size * num_filters; ++i) {
195 outputStateData[i * memory_size + memory_size - 1] = scratch[i];
196 }
197
198 // Begin ApplyTimeWeightsBiasAndActivation
199 // Compute matmul(state, weights_time).
200 for (int b = 0; b < batch_size; b++) {
201 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
202 float* scratch_ptr_batch = scratch + b * num_filters;
203 tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
204 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch);
205 }
206
207 // Reduction sum
208 tflite::tensor_utils::ReductionSumVector(scratch, outputData, batch_size * num_units, rank);
209
210 // Add bias if provided.
211 if (!IsNullInput(bias_)) {
212 tflite::tensor_utils::VectorBatchVectorAdd(biasData, num_units, batch_size, outputData);
213 }
214
215 // Apply activation.
216 tflite::tensor_utils::ApplyActivationToVector(
217 outputData, batch_size * num_units,
218 static_cast<TfLiteFusedActivation>(params_.activation_), outputData);
219 // Finished ApplyTimeWeightsBiasAndActivation
220
221 // Right shift the state.
222 for (int b = 0; b < batch_size; b++) {
223 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
224 for (int f = 0; f < num_filters; f++) {
225 std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,
226 state_out_ptr_batch);
227 state_out_ptr_batch[memory_size - 1] = 0.0;
228 state_out_ptr_batch += memory_size;
229 }
230 }
231 }
232
233 } // namespace nn
234 } // namespace android
235