1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Operations"
18 
19 #include "Multinomial.h"
20 
21 #include <algorithm>
22 #include <limits>
23 #include <vector>
24 
25 #include "CpuExecutor.h"
26 #include "Tracing.h"
27 
28 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
29 #include <tensorflow/lite/kernels/internal/tensor_utils.h>
30 
31 #pragma clang diagnostic push
32 #pragma clang diagnostic ignored "-Wunused-parameter"
33 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
34 #include <unsupported/Eigen/CXX11/Tensor>
35 #pragma clang diagnostic pop
36 
37 #include "CpuOperationUtils.h"
38 #include "guarded_philox_random.h"
39 #include "philox_random.h"
40 #include "simple_philox.h"
41 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION
42 
43 namespace android {
44 namespace nn {
45 
46 namespace {
47 
48 template <typename T>
GetBuffer(RunTimeOperandInfo * operand)49 inline T* GetBuffer(RunTimeOperandInfo* operand) {
50     return reinterpret_cast<T*>(operand->buffer);
51 }
52 
53 template <typename T>
GetBuffer(const RunTimeOperandInfo * operand)54 inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
55     return reinterpret_cast<const T*>(operand->buffer);
56 }
57 
58 }  // namespace
59 
Multinomial(const Operation & operation,RunTimeOperandInfo * operands)60 Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
61     NNTRACE_TRANS("Multinomial::Multinomial");
62     input_ = GetInput(operation, operands, kInputTensor);
63     sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
64     random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);
65 
66     output_ = GetOutput(operation, operands, kOutputTensor);
67 }
68 
Prepare(const Operation & operation,RunTimeOperandInfo * operands,Shape * outputShape)69 bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
70                           Shape* outputShape) {
71     NNTRACE_TRANS("Multinomial::Prepare");
72     NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
73     NN_CHECK_EQ(NumOutputs(operation), 1);
74 
75     const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
76     const Shape& inputShape = input->shape();
77 
78     const uint32_t batch_size = SizeOfDimension(input, 0);
79     const uint32_t sample_count =
80             getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
81 
82     outputShape->type = OperandType::TENSOR_INT32;
83     outputShape->dimensions = {batch_size, sample_count};
84     outputShape->offset = inputShape.offset;
85     outputShape->scale = inputShape.scale;
86 
87     return true;
88 }
89 
Eval()90 bool Multinomial::Eval() {
91     NNTRACE_COMP("Multinomial::Eval");
92     switch (input_->type) {
93         case OperandType::TENSOR_FLOAT16: {
94             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
95             convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
96             EvalFloat32(inputDataFloat32.data());
97             break;
98         }
99         case OperandType::TENSOR_FLOAT32: {
100             EvalFloat32(GetBuffer<float>(input_));
101             break;
102         }
103         default: {
104             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
105             return false;
106         }
107     }
108     return true;
109 }
110 
EvalFloat32(const float * inputData)111 void Multinomial::EvalFloat32(const float* inputData) {
112     const uint32_t batch_size = SizeOfDimension(input_, 0);
113     const uint32_t class_size = SizeOfDimension(input_, 1);
114 
115     tensorflow::GuardedPhiloxRandom random_generator;
116     int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
117     random_generator.Init(seeds[0], seeds[1]);
118 
119     // PhiloxRandom produces results as 4 32-bit integers.
120     int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
121     // The CPU operation uses 64-bit double values, so two results per sample.
122     sample_count_aligned *= 2;
123     auto random_generator_reserved =
124             random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
125     tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);
126 
127     for (uint64_t b = 0; b < batch_size; ++b) {
128         const float* input_ptr_batch = inputData + b * class_size;
129         float max = std::numeric_limits<float>::lowest();
130         for (uint64_t j = 0; j < class_size; ++j) {
131             if (Eigen::numext::isfinite(input_ptr_batch[j])) {
132                 max = std::max(max, input_ptr_batch[j]);
133             }
134         }
135         const double batch_max = static_cast<double>(max);
136         double total = 0;
137         std::vector<double> cdf;
138         cdf.resize(class_size);
139         for (uint64_t j = 0; j < class_size; ++j) {
140             if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
141                 total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
142             }
143             cdf[j] = total;
144         }
145 
146         auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
147         for (uint64_t j = 0; j < static_cast<uint64_t>(sample_count_); ++j) {
148             const double target = simple_philox.RandDouble() * total;
149             auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
150             output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
151         }
152     }
153 }
154 
155 }  // namespace nn
156 }  // namespace android
157