1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_LEB128_H_
18 #define ART_LIBARTBASE_BASE_LEB128_H_
19 
20 #include <optional>
21 #include <vector>
22 
23 #include <android-base/logging.h>
24 
25 #include "bit_utils.h"
26 #include "globals.h"
27 #include "macros.h"
28 
29 namespace art {
30 
31 template <typename T>
DecodeLeb128Helper(const uint8_t ** data,const std::optional<const void * > & end,T * out)32 static inline bool DecodeLeb128Helper(const uint8_t** data,
33                                       const std::optional<const void*>& end,
34                                       T* out) {
35   static_assert(sizeof(T) == 8 || sizeof(T) == 4);
36   const uint8_t* ptr = *data;
37   T result = 0;
38   const size_t num_bits = (sizeof(T) * 8);
39   // We can encode 7-bits per byte in leb128. So max_bytes is ceil(number_of_bits / 7)
40   const size_t max_bytes = (num_bits + 6u) / 7u;
41   for (size_t index = 0; index < max_bytes; ++index) {
42     if (end.has_value() && ptr >= end.value()) {
43       return false;
44     }
45 
46     std::make_unsigned_t<T> curr = *(ptr++);
47     result |= ((curr & 0x7f) << (index * 7));
48     if (LIKELY(curr <= 0x7f)) {
49       if (std::is_signed_v<T>) {
50         // For signed values we need to sign extend the result. If we are using all the bits then
51         // the result is already sign extended and we don't need to do anything.
52         if (index < max_bytes - 1) {
53           int shift = num_bits - (index + 1) * 7;
54           result = (result << shift) >> shift;
55         }
56       }
57       // End of encoding.
58       break;
59     }
60   }
61 
62   *out = result;
63   *data = ptr;
64   return true;
65 }
66 
67 template <typename T = uint32_t>
DecodeUnsignedLeb128(const uint8_t ** data)68 static inline T DecodeUnsignedLeb128(const uint8_t** data) {
69   static_assert(!std::is_signed_v<T>);
70   T value = 0;
71   DecodeLeb128Helper(data, std::nullopt, &value);
72   return value;
73 }
74 
75 template <typename T = uint32_t>
DecodeUnsignedLeb128Checked(const uint8_t ** data,const void * end,T * out)76 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data, const void* end, T* out) {
77   static_assert(!std::is_signed_v<T>);
78   return DecodeLeb128Helper(data, end, out);
79 }
80 
DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t * data)81 static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
82   return DecodeUnsignedLeb128(&data);
83 }
84 
85 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
86 // just past the end of the read value. This function tolerates
87 // non-zero high-order bits in the fifth encoded byte.
88 // It is possible for this function to return -1.
DecodeUnsignedLeb128P1(const uint8_t ** data)89 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
90   return DecodeUnsignedLeb128(data) - 1;
91 }
92 
93 template <typename T = int32_t>
DecodeSignedLeb128(const uint8_t ** data)94 static inline T DecodeSignedLeb128(const uint8_t** data) {
95   static_assert(std::is_signed_v<T>);
96   T value = 0;
97   DecodeLeb128Helper(data, std::nullopt, &value);
98   return value;
99 }
100 
101 template <typename T = int32_t>
DecodeSignedLeb128Checked(const uint8_t ** data,const void * end,T * out)102 static inline bool DecodeSignedLeb128Checked(const uint8_t** data, const void* end, T* out) {
103   static_assert(std::is_signed_v<T>);
104   return DecodeLeb128Helper(data, end, out);
105 }
106 
107 // Returns the number of bytes needed to encode the value in unsigned LEB128.
UnsignedLeb128Size(uint64_t data)108 static inline uint32_t UnsignedLeb128Size(uint64_t data) {
109   // bits_to_encode = (data != 0) ? 64 - CLZ(x) : 1  // 64 - CLZ(data | 1)
110   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
111   uint32_t x = 6 + 64 - CLZ(data | 1U);
112   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
113   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
114   return (x * 37) >> 8;
115 }
116 
IsLeb128Terminator(const uint8_t * ptr)117 static inline bool IsLeb128Terminator(const uint8_t* ptr) {
118   return *ptr <= 0x7f;
119 }
120 
121 // Returns the first byte of a Leb128 value assuming that:
122 // (1) `end_ptr` points to the first byte after the Leb128 value, and
123 // (2) there is another Leb128 value before this one.
124 template <typename T>
ReverseSearchUnsignedLeb128(T * end_ptr)125 static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
126   static_assert(std::is_same_v<std::remove_const_t<T>, uint8_t>,
127                 "T must be a uint8_t");
128   T* ptr = end_ptr;
129 
130   // Move one byte back, check that this is the terminating byte.
131   ptr--;
132   DCHECK(IsLeb128Terminator(ptr));
133 
134   // Keep moving back while the previous byte is not a terminating byte.
135   // Fail after reading five bytes in case there isn't another Leb128 value
136   // before this one.
137   while (!IsLeb128Terminator(ptr - 1)) {
138     ptr--;
139     DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
140   }
141 
142   return ptr;
143 }
144 
145 // Returns the number of bytes needed to encode the value in unsigned LEB128.
SignedLeb128Size(int64_t data)146 static inline uint32_t SignedLeb128Size(int64_t data) {
147   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
148   uint64_t bits_to_encode = static_cast<uint64_t>(data ^ (data >> 63));
149   uint32_t num_bits = 1 /* we need to encode the sign bit */ + 6 + 64 - CLZ(bits_to_encode | 1U);
150   // See UnsignedLeb128Size for explanation. This is basically num_bits / 7.
151   return (num_bits * 37) >> 8;
152 }
153 
EncodeUnsignedLeb128(uint8_t * dest,uint64_t value)154 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint64_t value) {
155   uint8_t out = value & 0x7f;
156   value >>= 7;
157   while (value != 0) {
158     *dest++ = out | 0x80;
159     out = value & 0x7f;
160     value >>= 7;
161   }
162   *dest++ = out;
163   return dest;
164 }
165 
166 template <typename Vector>
EncodeUnsignedLeb128(Vector * dest,uint64_t value)167 static inline void EncodeUnsignedLeb128(Vector* dest, uint64_t value) {
168   static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
169   uint8_t out = value & 0x7f;
170   value >>= 7;
171   while (value != 0) {
172     dest->push_back(out | 0x80);
173     out = value & 0x7f;
174     value >>= 7;
175   }
176   dest->push_back(out);
177 }
178 
179 // Overwrite encoded Leb128 with a new value. The new value must be less than
180 // or equal to the old value to ensure that it fits the allocated space.
UpdateUnsignedLeb128(uint8_t * dest,uint32_t value)181 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
182   const uint8_t* old_end = dest;
183   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
184   DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
185   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
186     // Use longer encoding than necessary to fill the allocated space.
187     end[-1] |= 0x80;
188     end[0] = 0;
189   }
190 }
191 
EncodeSignedLeb128(uint8_t * dest,int64_t value)192 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int64_t value) {
193   uint64_t extra_bits = static_cast<uint64_t>(value ^ (value >> 63)) >> 6;
194   uint8_t out = value & 0x7f;
195   while (extra_bits != 0u) {
196     *dest++ = out | 0x80;
197     value >>= 7;
198     out = value & 0x7f;
199     extra_bits >>= 7;
200   }
201   *dest++ = out;
202   return dest;
203 }
204 
EncodeSignedLeb128(std::vector<uint8_t> * dest,int64_t value)205 static inline void EncodeSignedLeb128(std::vector<uint8_t>* dest, int64_t value) {
206   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
207   uint8_t out = value & 0x7f;
208   while (extra_bits != 0u) {
209     dest->push_back(out | 0x80);
210     value >>= 7;
211     out = value & 0x7f;
212     extra_bits >>= 7;
213   }
214   dest->push_back(out);
215 }
216 
217 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
218 template <typename Vector = std::vector<uint8_t>>
219 class Leb128Encoder {
220   static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
221 
222  public:
Leb128Encoder(Vector * data)223   explicit Leb128Encoder(Vector* data) : data_(data) {
224     DCHECK(data != nullptr);
225   }
226 
Reserve(uint32_t size)227   void Reserve(uint32_t size) {
228     data_->reserve(size);
229   }
230 
PushBackUnsigned(uint32_t value)231   void PushBackUnsigned(uint32_t value) {
232     EncodeUnsignedLeb128(data_, value);
233   }
234 
235   template<typename It>
InsertBackUnsigned(It cur,It end)236   void InsertBackUnsigned(It cur, It end) {
237     for (; cur != end; ++cur) {
238       PushBackUnsigned(*cur);
239     }
240   }
241 
PushBackSigned(int32_t value)242   void PushBackSigned(int32_t value) {
243     EncodeSignedLeb128(data_, value);
244   }
245 
246   template<typename It>
InsertBackSigned(It cur,It end)247   void InsertBackSigned(It cur, It end) {
248     for (; cur != end; ++cur) {
249       PushBackSigned(*cur);
250     }
251   }
252 
GetData()253   const Vector& GetData() const {
254     return *data_;
255   }
256 
257  protected:
258   Vector* const data_;
259 
260  private:
261   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
262 };
263 
264 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
265 template <typename Vector = std::vector<uint8_t>>
266 class Leb128EncodingVector final : private Vector,
267                                    public Leb128Encoder<Vector> {
268   static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
269 
270  public:
Leb128EncodingVector()271   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
272 
Leb128EncodingVector(const typename Vector::allocator_type & alloc)273   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
274     : Vector(alloc),
275       Leb128Encoder<Vector>(this) { }
276 
277  private:
278   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
279 };
280 
281 }  // namespace art
282 
283 #endif  // ART_LIBARTBASE_BASE_LEB128_H_
284