1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Operations"
18
19 #include "Multinomial.h"
20
21 #include <algorithm>
22 #include <limits>
23 #include <vector>
24
25 #include "CpuExecutor.h"
26 #include "Tracing.h"
27
28 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
29 #include <tensorflow/lite/kernels/internal/tensor_utils.h>
30
31 #pragma clang diagnostic push
32 #pragma clang diagnostic ignored "-Wunused-parameter"
33 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
34 #include <unsupported/Eigen/CXX11/Tensor>
35 #pragma clang diagnostic pop
36
37 #include "CpuOperationUtils.h"
38 #include "guarded_philox_random.h"
39 #include "philox_random.h"
40 #include "simple_philox.h"
41 #endif // NN_INCLUDE_CPU_IMPLEMENTATION
42
43 namespace android {
44 namespace nn {
45
46 namespace {
47
48 template <typename T>
GetBuffer(RunTimeOperandInfo * operand)49 inline T* GetBuffer(RunTimeOperandInfo* operand) {
50 return reinterpret_cast<T*>(operand->buffer);
51 }
52
53 template <typename T>
GetBuffer(const RunTimeOperandInfo * operand)54 inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
55 return reinterpret_cast<const T*>(operand->buffer);
56 }
57
58 } // namespace
59
Multinomial(const Operation & operation,RunTimeOperandInfo * operands)60 Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
61 NNTRACE_TRANS("Multinomial::Multinomial");
62 input_ = GetInput(operation, operands, kInputTensor);
63 sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
64 random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);
65
66 output_ = GetOutput(operation, operands, kOutputTensor);
67 }
68
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * outputShape)69 bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
70 Shape* outputShape) {
71 NNTRACE_TRANS("Multinomial::Prepare");
72 NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
73 NN_CHECK_EQ(NumOutputs(operation), 1);
74
75 const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
76 const Shape& inputShape = input->shape();
77
78 const uint32_t batch_size = SizeOfDimension(input, 0);
79 const uint32_t sample_count =
80 getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
81
82 outputShape->type = OperandType::TENSOR_INT32;
83 outputShape->dimensions = {batch_size, sample_count};
84 outputShape->offset = inputShape.offset;
85 outputShape->scale = inputShape.scale;
86
87 return true;
88 }
89
Eval()90 bool Multinomial::Eval() {
91 NNTRACE_COMP("Multinomial::Eval");
92 switch (input_->type) {
93 case OperandType::TENSOR_FLOAT16: {
94 std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
95 convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
96 EvalFloat32(inputDataFloat32.data());
97 break;
98 }
99 case OperandType::TENSOR_FLOAT32: {
100 EvalFloat32(GetBuffer<float>(input_));
101 break;
102 }
103 default: {
104 LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
105 return false;
106 }
107 }
108 return true;
109 }
110
EvalFloat32(const float * inputData)111 void Multinomial::EvalFloat32(const float* inputData) {
112 const uint32_t batch_size = SizeOfDimension(input_, 0);
113 const uint32_t class_size = SizeOfDimension(input_, 1);
114
115 tensorflow::GuardedPhiloxRandom random_generator;
116 int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
117 random_generator.Init(seeds[0], seeds[1]);
118
119 // PhiloxRandom produces results as 4 32-bit integers.
120 int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
121 // The CPU operation uses 64-bit double values, so two results per sample.
122 sample_count_aligned *= 2;
123 auto random_generator_reserved =
124 random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
125 tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);
126
127 for (uint64_t b = 0; b < batch_size; ++b) {
128 const float* input_ptr_batch = inputData + b * class_size;
129 float max = std::numeric_limits<float>::lowest();
130 for (uint64_t j = 0; j < class_size; ++j) {
131 if (Eigen::numext::isfinite(input_ptr_batch[j])) {
132 max = std::max(max, input_ptr_batch[j]);
133 }
134 }
135 const double batch_max = static_cast<double>(max);
136 double total = 0;
137 std::vector<double> cdf;
138 cdf.resize(class_size);
139 for (uint64_t j = 0; j < class_size; ++j) {
140 if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
141 total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
142 }
143 cdf[j] = total;
144 }
145
146 auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
147 for (uint64_t j = 0; j < static_cast<uint64_t>(sample_count_); ++j) {
148 const double target = simple_philox.RandDouble() * total;
149 auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
150 output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
151 }
152 }
153 }
154
155 } // namespace nn
156 } // namespace android
157