1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "slicer/reader.h"
18 
19 #include "slicer/chronometer.h"
20 #include "slicer/dex_bytecode.h"
21 #include "slicer/dex_leb128.h"
22 
23 #include <assert.h>
24 #include <string.h>
25 #include <type_traits>
26 #include <cstdlib>
27 
28 namespace dex {
29 
Reader(const dex::u1 * image,size_t size)30 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
31   // init the header reference
32   header_ = ptr<dex::Header>(0);
33   ValidateHeader();
34 
35   // start with an "empty" .dex IR
36   dex_ir_ = std::make_shared<ir::DexFile>();
37   dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
38 }
39 
ClassDefs() const40 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
41   return section<dex::ClassDef>(header_->class_defs_off,
42                                 header_->class_defs_size);
43 }
44 
StringIds() const45 slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
46   return section<dex::StringId>(header_->string_ids_off,
47                                 header_->string_ids_size);
48 }
49 
TypeIds() const50 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
51   return section<dex::TypeId>(header_->type_ids_off,
52                               header_->type_ids_size);
53 }
54 
FieldIds() const55 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
56   return section<dex::FieldId>(header_->field_ids_off,
57                                header_->field_ids_size);
58 }
59 
MethodIds() const60 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
61   return section<dex::MethodId>(header_->method_ids_off,
62                                 header_->method_ids_size);
63 }
64 
ProtoIds() const65 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
66   return section<dex::ProtoId>(header_->proto_ids_off,
67                                header_->proto_ids_size);
68 }
69 
MethodHandles() const70 slicer::ArrayView<const dex::MethodHandle> Reader::MethodHandles() const {
71   const dex::MapList* ml = DexMapList();
72   if(ml == nullptr){
73     slicer::ArrayView<const dex::MethodHandle> ret;
74     return ret;
75   }
76 
77   // Find MethodHandle entry
78   const dex::MapItem* mi = nullptr;
79   for(int i = 0; i < ml->size; i++){
80     if(ml->list[i].type == dex::kMethodHandleItem){
81       mi = &(ml->list[i]);
82       break;
83     }
84   }
85 
86   if(mi == nullptr){
87     slicer::ArrayView<const dex::MethodHandle> ret;
88     return ret;
89   }
90 
91   return section<dex::MethodHandle>(mi->offset, mi->size);
92 }
93 
DexMapList() const94 const dex::MapList* Reader::DexMapList() const {
95   return dataPtr<dex::MapList>(header_->map_off);
96 }
97 
GetStringMUTF8(dex::u4 index) const98 const char* Reader::GetStringMUTF8(dex::u4 index) const {
99   if (index == dex::kNoIndex) {
100     return "<no_string>";
101   }
102   const dex::u1* strData = GetStringData(index);
103   dex::ReadULeb128(&strData);
104   return reinterpret_cast<const char*>(strData);
105 }
106 
CreateFullIr()107 void Reader::CreateFullIr() {
108   size_t classCount = ClassDefs().size();
109   for (size_t i = 0; i < classCount; ++i) {
110     CreateClassIr(i);
111   }
112 }
113 
CreateClassIr(dex::u4 index)114 void Reader::CreateClassIr(dex::u4 index) {
115   auto ir_class = GetClass(index);
116   SLICER_CHECK_NE(ir_class, nullptr);
117 }
118 
119 // Returns the index of the class with the specified
120 // descriptor, or kNoIndex if not found
FindClassIndex(const char * class_descriptor) const121 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
122   auto classes = ClassDefs();
123   auto types = TypeIds();
124   for (dex::u4 i = 0; i < classes.size(); ++i) {
125     auto typeId = types[classes[i].class_idx];
126     const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
127     if (strcmp(class_descriptor, descriptor) == 0) {
128       return i;
129     }
130   }
131   return dex::kNoIndex;
132 }
133 
134 // map a .dex index to corresponding .dex IR node
135 //
136 // NOTES:
137 //  1. the mapping between an index and the indexed
138 //     .dex IR nodes is 1:1
139 //  2. we do a single index lookup for both existing
140 //     nodes as well as new nodes
141 //  3. placeholder is an invalid, but non-null pointer value
142 //     used to check that the mapping lookup/update is atomic
143 //  4. there should be no recursion with the same index
144 //     (we use the placeholder value to guard against this too)
145 //
GetClass(dex::u4 index)146 ir::Class* Reader::GetClass(dex::u4 index) {
147   SLICER_CHECK_NE(index, dex::kNoIndex);
148   auto& p = dex_ir_->classes_map[index];
149   auto placeholder = reinterpret_cast<ir::Class*>(1);
150   if (p == nullptr) {
151     p = placeholder;
152     auto newClass = ParseClass(index);
153     SLICER_CHECK_EQ(p, placeholder);
154     p = newClass;
155     dex_ir_->classes_indexes.MarkUsedIndex(index);
156   }
157   SLICER_CHECK_NE(p, placeholder);
158   return p;
159 }
160 
161 // map a .dex index to corresponding .dex IR node
162 // (see the Reader::GetClass() comments)
GetType(dex::u4 index)163 ir::Type* Reader::GetType(dex::u4 index) {
164   SLICER_CHECK_NE(index, dex::kNoIndex);
165   auto& p = dex_ir_->types_map[index];
166   auto placeholder = reinterpret_cast<ir::Type*>(1);
167   if (p == nullptr) {
168     p = placeholder;
169     auto newType = ParseType(index);
170     SLICER_CHECK_EQ(p, placeholder);
171     p = newType;
172     dex_ir_->types_indexes.MarkUsedIndex(index);
173   }
174   SLICER_CHECK_NE(p, placeholder);
175   return p;
176 }
177 
178 // map a .dex index to corresponding .dex IR node
179 // (see the Reader::GetClass() comments)
GetFieldDecl(dex::u4 index)180 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
181   SLICER_CHECK_NE(index, dex::kNoIndex);
182   auto& p = dex_ir_->fields_map[index];
183   auto placeholder = reinterpret_cast<ir::FieldDecl*>(1);
184   if (p == nullptr) {
185     p = placeholder;
186     auto newField = ParseFieldDecl(index);
187     SLICER_CHECK_EQ(p, placeholder);
188     p = newField;
189     dex_ir_->fields_indexes.MarkUsedIndex(index);
190   }
191   SLICER_CHECK_NE(p, placeholder);
192   return p;
193 }
194 
GetMethodHandle(dex::u4 index)195 ir::MethodHandle* Reader::GetMethodHandle(dex::u4 index){
196   SLICER_CHECK_NE(index, dex::kNoIndex);
197   auto& p = dex_ir_->method_handles_map[index];
198   auto placeholder = reinterpret_cast<ir::MethodHandle*>(1);
199   if(p == nullptr) {
200     p = placeholder;
201     auto newMethodHandle = ParseMethodHandle(index);
202     SLICER_CHECK_EQ(p, placeholder);
203     p = newMethodHandle;
204     dex_ir_->method_handle_indexes.MarkUsedIndex(index);
205   }
206 
207   SLICER_CHECK_NE(p, placeholder);
208   return p;
209 }
210 
211 // map a .dex index to corresponding .dex IR node
212 // (see the Reader::GetClass() comments)
GetMethodDecl(dex::u4 index)213 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
214   SLICER_CHECK_NE(index, dex::kNoIndex);
215   auto& p = dex_ir_->methods_map[index];
216   auto placeholder = reinterpret_cast<ir::MethodDecl*>(1);
217   if (p == nullptr) {
218     p = placeholder;
219     auto newMethod = ParseMethodDecl(index);
220     SLICER_CHECK_EQ(p, placeholder);
221     p = newMethod;
222     dex_ir_->methods_indexes.MarkUsedIndex(index);
223   }
224   SLICER_CHECK_NE(p, placeholder);
225   return p;
226 }
227 
228 // map a .dex index to corresponding .dex IR node
229 // (see the Reader::GetClass() comments)
GetProto(dex::u4 index)230 ir::Proto* Reader::GetProto(dex::u4 index) {
231   SLICER_CHECK_NE(index, dex::kNoIndex);
232   auto& p = dex_ir_->protos_map[index];
233   auto placeholder = reinterpret_cast<ir::Proto*>(1);
234   if (p == nullptr) {
235     p = placeholder;
236     auto newProto = ParseProto(index);
237     SLICER_CHECK_EQ(p, placeholder);
238     p = newProto;
239     dex_ir_->protos_indexes.MarkUsedIndex(index);
240   }
241   SLICER_CHECK_NE(p, placeholder);
242   return p;
243 }
244 
245 // map a .dex index to corresponding .dex IR node
246 // (see the Reader::GetClass() comments)
GetString(dex::u4 index)247 ir::String* Reader::GetString(dex::u4 index) {
248   SLICER_CHECK_NE(index, dex::kNoIndex);
249   auto& p = dex_ir_->strings_map[index];
250   auto placeholder = reinterpret_cast<ir::String*>(1);
251   if (p == nullptr) {
252     p = placeholder;
253     auto newString = ParseString(index);
254     SLICER_CHECK_EQ(p, placeholder);
255     p = newString;
256     dex_ir_->strings_indexes.MarkUsedIndex(index);
257   }
258   SLICER_CHECK_NE(p, placeholder);
259   return p;
260 }
261 
ParseClass(dex::u4 index)262 ir::Class* Reader::ParseClass(dex::u4 index) {
263   auto& dex_class_def = ClassDefs()[index];
264   auto ir_class = dex_ir_->Alloc<ir::Class>();
265 
266   ir_class->type = GetType(dex_class_def.class_idx);
267   assert(ir_class->type->class_def == nullptr);
268   ir_class->type->class_def = ir_class;
269 
270   ir_class->access_flags = dex_class_def.access_flags;
271   ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);
272 
273   if (dex_class_def.superclass_idx != dex::kNoIndex) {
274     ir_class->super_class = GetType(dex_class_def.superclass_idx);
275   }
276 
277   if (dex_class_def.source_file_idx != dex::kNoIndex) {
278     ir_class->source_file = GetString(dex_class_def.source_file_idx);
279   }
280 
281   if (dex_class_def.class_data_off != 0) {
282     const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);
283 
284     dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
285     dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
286     dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
287     dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);
288 
289     dex::u4 base_index = dex::kNoIndex;
290     for (dex::u4 i = 0; i < static_fields_count; ++i) {
291       auto field = ParseEncodedField(&class_data, &base_index);
292       ir_class->static_fields.push_back(field);
293     }
294 
295     base_index = dex::kNoIndex;
296     for (dex::u4 i = 0; i < instance_fields_count; ++i) {
297       auto field = ParseEncodedField(&class_data, &base_index);
298       ir_class->instance_fields.push_back(field);
299     }
300 
301     base_index = dex::kNoIndex;
302     for (dex::u4 i = 0; i < direct_methods_count; ++i) {
303       auto method = ParseEncodedMethod(&class_data, &base_index);
304       ir_class->direct_methods.push_back(method);
305     }
306 
307     base_index = dex::kNoIndex;
308     for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
309       auto method = ParseEncodedMethod(&class_data, &base_index);
310       ir_class->virtual_methods.push_back(method);
311     }
312   }
313 
314   ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
315   ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
316   ir_class->orig_index = index;
317 
318   return ir_class;
319 }
320 
ExtractAnnotations(dex::u4 offset)321 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
322   if (offset == 0) {
323     return nullptr;
324   }
325 
326   SLICER_CHECK_EQ(offset % 4, 0);
327 
328   // first check if we already extracted the same "annotations_directory_item"
329   auto& ir_annotations = annotations_directories_[offset];
330   if (ir_annotations == nullptr) {
331     ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();
332 
333     auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);
334 
335     ir_annotations->class_annotation =
336         ExtractAnnotationSet(dex_annotations->class_annotations_off);
337 
338     const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);
339 
340     for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
341       ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
342     }
343 
344     for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
345       ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
346     }
347 
348     for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
349       ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
350     }
351   }
352   return ir_annotations;
353 }
354 
ExtractAnnotationItem(dex::u4 offset)355 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
356   SLICER_CHECK_NE(offset, 0);
357 
358   // first check if we already extracted the same "annotation_item"
359   auto& ir_annotation = annotations_[offset];
360   if (ir_annotation == nullptr) {
361     auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
362     const dex::u1* ptr = dexAnnotationItem->annotation;
363     ir_annotation = ParseAnnotation(&ptr);
364     ir_annotation->visibility = dexAnnotationItem->visibility;
365   }
366   return ir_annotation;
367 }
368 
ExtractAnnotationSet(dex::u4 offset)369 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
370   if (offset == 0) {
371     return nullptr;
372   }
373 
374   SLICER_CHECK_EQ(offset % 4, 0);
375 
376   // first check if we already extracted the same "annotation_set_item"
377   auto& ir_annotation_set = annotation_sets_[offset];
378   if (ir_annotation_set == nullptr) {
379     ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();
380 
381     auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
382     for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
383       auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
384       assert(ir_annotation != nullptr);
385       ir_annotation_set->annotations.push_back(ir_annotation);
386     }
387   }
388   return ir_annotation_set;
389 }
390 
ExtractAnnotationSetRefList(dex::u4 offset)391 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
392   SLICER_CHECK_EQ(offset % 4, 0);
393 
394   auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
395   auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();
396 
397   for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
398     dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
399     if (entry_offset != 0) {
400       auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
401       SLICER_CHECK_NE(ir_annotation_set, nullptr);
402       ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
403     }
404   }
405 
406   return ir_annotation_set_ref_list;
407 }
408 
ParseFieldAnnotation(const dex::u1 ** pptr)409 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
410   auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
411   auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();
412 
413   ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);
414 
415   ir_field_annotation->annotations =
416       ExtractAnnotationSet(dex_field_annotation->annotations_off);
417   SLICER_CHECK_NE(ir_field_annotation->annotations, nullptr);
418 
419   *pptr += sizeof(dex::FieldAnnotationsItem);
420   return ir_field_annotation;
421 }
422 
ParseMethodAnnotation(const dex::u1 ** pptr)423 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
424   auto dex_method_annotation =
425       reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
426   auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();
427 
428   ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);
429 
430   ir_method_annotation->annotations =
431       ExtractAnnotationSet(dex_method_annotation->annotations_off);
432   SLICER_CHECK_NE(ir_method_annotation->annotations, nullptr);
433 
434   *pptr += sizeof(dex::MethodAnnotationsItem);
435   return ir_method_annotation;
436 }
437 
ParseParamAnnotation(const dex::u1 ** pptr)438 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
439   auto dex_param_annotation =
440       reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
441   auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();
442 
443   ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);
444 
445   ir_param_annotation->annotations =
446       ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
447   SLICER_CHECK_NE(ir_param_annotation->annotations, nullptr);
448 
449   *pptr += sizeof(dex::ParameterAnnotationsItem);
450   return ir_param_annotation;
451 }
452 
ParseEncodedField(const dex::u1 ** pptr,dex::u4 * base_index)453 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
454   auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();
455 
456   auto field_index = dex::ReadULeb128(pptr);
457   SLICER_CHECK_NE(field_index, dex::kNoIndex);
458   if (*base_index != dex::kNoIndex) {
459     SLICER_CHECK_NE(field_index, 0);
460     field_index += *base_index;
461   }
462   *base_index = field_index;
463 
464   ir_encoded_field->decl = GetFieldDecl(field_index);
465   ir_encoded_field->access_flags = dex::ReadULeb128(pptr);
466 
467   return ir_encoded_field;
468 }
469 
470 // Parse an encoded variable-length integer value
471 // (sign-extend signed types, zero-extend unsigned types)
472 template <class T>
ParseIntValue(const dex::u1 ** pptr,size_t size)473 static T ParseIntValue(const dex::u1** pptr, size_t size) {
474   static_assert(std::is_integral<T>::value, "must be an integral type");
475 
476   SLICER_CHECK_GT(size, 0);
477   SLICER_CHECK_LE(size, sizeof(T));
478 
479   T value = 0;
480   for (int i = 0; i < size; ++i) {
481     value |= T(*(*pptr)++) << (i * 8);
482   }
483 
484   // sign-extend?
485   if (std::is_signed<T>::value) {
486     size_t shift = (sizeof(T) - size) * 8;
487     value = T(value << shift) >> shift;
488   }
489 
490   return value;
491 }
492 
493 // Parse an encoded variable-length floating point value
494 // (zero-extend to the right)
495 template <class T>
ParseFloatValue(const dex::u1 ** pptr,size_t size)496 static T ParseFloatValue(const dex::u1** pptr, size_t size) {
497   SLICER_CHECK_GT(size, 0);
498   SLICER_CHECK_LE(size, sizeof(T));
499 
500   T value = 0;
501   int start_byte = sizeof(T) - size;
502   for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
503        --size) {
504     *p++ = *(*pptr)++;
505   }
506   return value;
507 }
508 
ParseEncodedValue(const dex::u1 ** pptr)509 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
510   auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();
511 
512   SLICER_EXTRA(auto base_ptr = *pptr);
513 
514   dex::u1 header = *(*pptr)++;
515   dex::u1 type = header & dex::kEncodedValueTypeMask;
516   dex::u1 arg = header >> dex::kEncodedValueArgShift;
517 
518   ir_encoded_value->type = type;
519 
520   switch (type) {
521     case dex::kEncodedByte:
522       ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
523       break;
524 
525     case dex::kEncodedShort:
526       ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
527       break;
528 
529     case dex::kEncodedChar:
530       ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
531       break;
532 
533     case dex::kEncodedInt:
534       ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
535       break;
536 
537     case dex::kEncodedLong:
538       ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
539       break;
540 
541     case dex::kEncodedFloat:
542       ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
543       break;
544 
545     case dex::kEncodedDouble:
546       ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
547       break;
548 
549     case dex::kEncodedString: {
550       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
551       ir_encoded_value->u.string_value = GetString(index);
552     } break;
553 
554     case dex::kEncodedType: {
555       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
556       ir_encoded_value->u.type_value = GetType(index);
557     } break;
558 
559     case dex::kEncodedField: {
560       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
561       ir_encoded_value->u.field_value = GetFieldDecl(index);
562     } break;
563 
564     case dex::kEncodedMethod: {
565       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
566       ir_encoded_value->u.method_value = GetMethodDecl(index);
567     } break;
568 
569     case dex::kEncodedEnum: {
570       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
571       ir_encoded_value->u.enum_value = GetFieldDecl(index);
572     } break;
573 
574     case dex::kEncodedArray:
575       SLICER_CHECK_EQ(arg, 0);
576       ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
577       break;
578 
579     case dex::kEncodedAnnotation:
580       SLICER_CHECK_EQ(arg, 0);
581       ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
582       break;
583 
584     case dex::kEncodedNull:
585       SLICER_CHECK_EQ(arg, 0);
586       break;
587 
588     case dex::kEncodedBoolean:
589       SLICER_CHECK_LT(arg, 2);
590       ir_encoded_value->u.bool_value = (arg == 1);
591       break;
592 
593     default:
594       SLICER_CHECK(!"unexpected value type");
595   }
596 
597   SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));
598 
599   return ir_encoded_value;
600 }
601 
ParseAnnotation(const dex::u1 ** pptr)602 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
603   auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();
604 
605   dex::u4 type_index = dex::ReadULeb128(pptr);
606   dex::u4 elements_count = dex::ReadULeb128(pptr);
607 
608   ir_annotation->type = GetType(type_index);
609   ir_annotation->visibility = dex::kVisibilityEncoded;
610 
611   for (dex::u4 i = 0; i < elements_count; ++i) {
612     auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();
613 
614     ir_element->name = GetString(dex::ReadULeb128(pptr));
615     ir_element->value = ParseEncodedValue(pptr);
616 
617     ir_annotation->elements.push_back(ir_element);
618   }
619 
620   return ir_annotation;
621 }
622 
ParseEncodedArray(const dex::u1 ** pptr)623 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
624   auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();
625 
626   dex::u4 count = dex::ReadULeb128(pptr);
627   for (dex::u4 i = 0; i < count; ++i) {
628     ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
629   }
630 
631   return ir_encoded_array;
632 }
633 
ExtractEncodedArray(dex::u4 offset)634 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
635   if (offset == 0) {
636     return nullptr;
637   }
638 
639   // first check if we already extracted the same "annotation_item"
640   auto& ir_encoded_array = encoded_arrays_[offset];
641   if (ir_encoded_array == nullptr) {
642     auto ptr = dataPtr<dex::u1>(offset);
643     ir_encoded_array = ParseEncodedArray(&ptr);
644   }
645   return ir_encoded_array;
646 }
647 
ExtractDebugInfo(dex::u4 offset)648 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
649   if (offset == 0) {
650     return nullptr;
651   }
652 
653   auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
654   const dex::u1* ptr = dataPtr<dex::u1>(offset);
655 
656   ir_debug_info->line_start = dex::ReadULeb128(&ptr);
657 
658   // TODO: implicit this param for non-static methods?
659   dex::u4 param_count = dex::ReadULeb128(&ptr);
660   for (dex::u4 i = 0; i < param_count; ++i) {
661     dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
662     auto ir_string =
663         (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
664     ir_debug_info->param_names.push_back(ir_string);
665   }
666 
667   // parse the debug info opcodes and note the
668   // references to strings and types (to make sure the IR
669   // is the full closure of all referenced items)
670   //
671   // TODO: design a generic debug info iterator?
672   //
673   auto base_ptr = ptr;
674   dex::u1 opcode = 0;
675   while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
676     switch (opcode) {
677       case dex::DBG_ADVANCE_PC:
678         // addr_diff
679         dex::ReadULeb128(&ptr);
680         break;
681 
682       case dex::DBG_ADVANCE_LINE:
683         // line_diff
684         dex::ReadSLeb128(&ptr);
685         break;
686 
687       case dex::DBG_START_LOCAL: {
688         // register_num
689         dex::ReadULeb128(&ptr);
690 
691         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
692         if (name_index != dex::kNoIndex) {
693           GetString(name_index);
694         }
695 
696         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
697         if (type_index != dex::kNoIndex) {
698           GetType(type_index);
699         }
700       } break;
701 
702       case dex::DBG_START_LOCAL_EXTENDED: {
703         // register_num
704         dex::ReadULeb128(&ptr);
705 
706         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
707         if (name_index != dex::kNoIndex) {
708           GetString(name_index);
709         }
710 
711         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
712         if (type_index != dex::kNoIndex) {
713           GetType(type_index);
714         }
715 
716         dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
717         if (sig_index != dex::kNoIndex) {
718           GetString(sig_index);
719         }
720       } break;
721 
722       case dex::DBG_END_LOCAL:
723       case dex::DBG_RESTART_LOCAL:
724         // register_num
725         dex::ReadULeb128(&ptr);
726         break;
727 
728       case dex::DBG_SET_FILE: {
729         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
730         if (name_index != dex::kNoIndex) {
731           GetString(name_index);
732         }
733       } break;
734     }
735   }
736 
737   ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);
738 
739   return ir_debug_info;
740 }
741 
ExtractCode(dex::u4 offset)742 ir::Code* Reader::ExtractCode(dex::u4 offset) {
743   if (offset == 0) {
744     return nullptr;
745   }
746 
747   SLICER_CHECK_EQ(offset % 4, 0);
748 
749   auto dex_code = dataPtr<dex::Code>(offset);
750   auto ir_code = dex_ir_->Alloc<ir::Code>();
751 
752   ir_code->registers = dex_code->registers_size;
753   ir_code->ins_count = dex_code->ins_size;
754   ir_code->outs_count = dex_code->outs_size;
755 
756   // instructions array
757   ir_code->instructions =
758       slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);
759 
760   // parse the instructions to discover references to other
761   // IR nodes (see debug info stream parsing too)
762   ParseInstructions(ir_code->instructions);
763 
764   // try blocks & handlers
765   //
766   // TODO: a generic try/catch blocks iterator?
767   //
768   if (dex_code->tries_size != 0) {
769     dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
770     auto tries =
771         reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
772     auto handlers_list =
773         reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);
774 
775     ir_code->try_blocks =
776         slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);
777 
778     // parse the handlers list (and discover embedded references)
779     auto ptr = handlers_list;
780 
781     dex::u4 handlers_count = dex::ReadULeb128(&ptr);
782     SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);
783 
784     for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
785       int catch_count = dex::ReadSLeb128(&ptr);
786 
787       for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
788         dex::u4 type_index = dex::ReadULeb128(&ptr);
789         GetType(type_index);
790 
791         // address
792         dex::ReadULeb128(&ptr);
793       }
794 
795       if (catch_count < 1) {
796         // catch_all_addr
797         dex::ReadULeb128(&ptr);
798       }
799     }
800 
801     ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
802   }
803 
804   ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);
805 
806   return ir_code;
807 }
808 
ParseEncodedMethod(const dex::u1 ** pptr,dex::u4 * base_index)809 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
810   auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();
811 
812   auto method_index = dex::ReadULeb128(pptr);
813   SLICER_CHECK_NE(method_index, dex::kNoIndex);
814   if (*base_index != dex::kNoIndex) {
815     SLICER_CHECK_NE(method_index, 0);
816     method_index += *base_index;
817   }
818   *base_index = method_index;
819 
820   ir_encoded_method->decl = GetMethodDecl(method_index);
821   ir_encoded_method->access_flags = dex::ReadULeb128(pptr);
822 
823   dex::u4 code_offset = dex::ReadULeb128(pptr);
824   ir_encoded_method->code = ExtractCode(code_offset);
825 
826   // update the methods lookup table
827   dex_ir_->methods_lookup.Insert(ir_encoded_method);
828 
829   return ir_encoded_method;
830 }
831 
ParseType(dex::u4 index)832 ir::Type* Reader::ParseType(dex::u4 index) {
833   auto& dex_type = TypeIds()[index];
834   auto ir_type = dex_ir_->Alloc<ir::Type>();
835 
836   ir_type->descriptor = GetString(dex_type.descriptor_idx);
837   ir_type->orig_index = index;
838 
839   return ir_type;
840 }
841 
ParseFieldDecl(dex::u4 index)842 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
843   auto& dex_field = FieldIds()[index];
844   auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();
845 
846   ir_field->name = GetString(dex_field.name_idx);
847   ir_field->type = GetType(dex_field.type_idx);
848   ir_field->parent = GetType(dex_field.class_idx);
849   ir_field->orig_index = index;
850 
851   return ir_field;
852 }
853 
ParseMethodHandle(dex::u4 index)854 ir::MethodHandle* Reader::ParseMethodHandle(dex::u4 index){
855   auto& dex_method_handle = MethodHandles()[index];
856   auto ir_method_handle = dex_ir_->Alloc<ir::MethodHandle>();
857 
858   ir_method_handle->method_handle_type = dex_method_handle.method_handle_type;
859 
860   if(ir_method_handle->IsField()){
861     ir_method_handle->field = GetFieldDecl(dex_method_handle.field_or_method_id);
862   }
863   else {
864     ir_method_handle->method = GetMethodDecl(dex_method_handle.field_or_method_id);
865   }
866 
867   return ir_method_handle;
868 }
869 
ParseMethodDecl(dex::u4 index)870 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
871   auto& dex_method = MethodIds()[index];
872   auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();
873 
874   ir_method->name = GetString(dex_method.name_idx);
875   ir_method->prototype = GetProto(dex_method.proto_idx);
876   ir_method->parent = GetType(dex_method.class_idx);
877   ir_method->orig_index = index;
878 
879   return ir_method;
880 }
881 
ExtractTypeList(dex::u4 offset)882 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
883   if (offset == 0) {
884     return nullptr;
885   }
886 
887   // first check to see if we already extracted the same "type_list"
888   auto& ir_type_list = type_lists_[offset];
889   if (ir_type_list == nullptr) {
890     ir_type_list = dex_ir_->Alloc<ir::TypeList>();
891 
892     auto dex_type_list = dataPtr<dex::TypeList>(offset);
893     SLICER_WEAK_CHECK(dex_type_list->size > 0);
894 
895     for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
896       ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
897     }
898   }
899 
900   return ir_type_list;
901 }
902 
ParseProto(dex::u4 index)903 ir::Proto* Reader::ParseProto(dex::u4 index) {
904   auto& dex_proto = ProtoIds()[index];
905   auto ir_proto = dex_ir_->Alloc<ir::Proto>();
906 
907   ir_proto->shorty = GetString(dex_proto.shorty_idx);
908   ir_proto->return_type = GetType(dex_proto.return_type_idx);
909   ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
910   ir_proto->orig_index = index;
911 
912   // update the prototypes lookup table
913   dex_ir_->prototypes_lookup.Insert(ir_proto);
914 
915   return ir_proto;
916 }
917 
ParseString(dex::u4 index)918 ir::String* Reader::ParseString(dex::u4 index) {
919   auto ir_string = dex_ir_->Alloc<ir::String>();
920 
921   auto data = GetStringData(index);
922   auto cstr = data;
923   dex::ReadULeb128(&cstr);
924   size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;
925 
926   ir_string->data = slicer::MemView(data, size);
927   ir_string->orig_index = index;
928 
929   // update the strings lookup table
930   dex_ir_->strings_lookup.Insert(ir_string);
931 
932   return ir_string;
933 }
934 
ParseInstructions(slicer::ArrayView<const dex::u2> code)935 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
936   const dex::u2* ptr = code.begin();
937   while (ptr < code.end()) {
938     auto dex_instr = dex::DecodeInstruction(ptr);
939 
940     dex::u4 index = dex::kNoIndex;
941     dex::u4 index2 = dex::kNoIndex;
942     switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
943       case dex::k20bc:
944       case dex::k21c:
945       case dex::k31c:
946       case dex::k35c:
947       case dex::k3rc:
948         index = dex_instr.vB;
949         break;
950 
951       case dex::k45cc:
952       case dex::k4rcc:
953         index = dex_instr.vB;
954         index2 = dex_instr.arg[4];
955         break;
956 
957       case dex::k22c:
958         index = dex_instr.vC;
959         break;
960 
961       default:
962         break;
963     }
964 
965     switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
966       case dex::kIndexStringRef:
967         GetString(index);
968         break;
969 
970       case dex::kIndexTypeRef:
971         GetType(index);
972         break;
973 
974       case dex::kIndexFieldRef:
975         GetFieldDecl(index);
976         break;
977 
978       case dex::kIndexMethodRef:
979         GetMethodDecl(index);
980         break;
981 
982       case dex::kIndexMethodAndProtoRef:
983         GetMethodDecl(index);
984         GetProto(index2);
985         break;
986 
987       case dex::kIndexMethodHandleRef:
988         GetMethodHandle(index);
989         break;
990 
991       default:
992         break;
993     }
994 
995     auto isize = dex::GetWidthFromBytecode(ptr);
996     SLICER_CHECK_GT(isize, 0);
997     ptr += isize;
998   }
999   SLICER_CHECK_EQ(ptr, code.end());
1000 }
1001 
1002 // Basic .dex header structural checks
ValidateHeader()1003 void Reader::ValidateHeader() {
1004   SLICER_CHECK_GT(size_, dex::Header::kV40Size);
1005 
1006   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1007   // estimate. b/72402467
1008   SLICER_CHECK_LE(header_->file_size, size_);
1009   // Check that we support this version of dex header
1010   SLICER_CHECK(
1011       header_->header_size == dex::Header::kV40Size ||
1012       header_->header_size == dex::Header::kV41Size);
1013   SLICER_CHECK_EQ(header_->endian_tag, dex::kEndianConstant);
1014   SLICER_CHECK_EQ(header_->data_size % 4, 0);
1015 
1016   // If the dex file is within container with other dex files,
1017   // adjust the base address to the start of the container.
1018   SLICER_CHECK_LE(header_->ContainerSize() - header_->ContainerOff(), size_);
1019   image_ -= header_->ContainerOff();
1020   size_ = header_->ContainerSize();
1021 
1022   // Known issue: The fields might be slighly corrupted b/65452964
1023   // SLICER_CHECK_LE(header_->data_off + header_->data_size, size_);
1024 
1025   SLICER_CHECK_EQ(header_->string_ids_off % 4, 0);
1026   SLICER_CHECK_LT(header_->type_ids_size, 65536);
1027   SLICER_CHECK_EQ(header_->type_ids_off % 4, 0);
1028   SLICER_CHECK_LT(header_->proto_ids_size, 65536);
1029   SLICER_CHECK_EQ(header_->proto_ids_off % 4, 0);
1030   SLICER_CHECK_EQ(header_->field_ids_off % 4, 0);
1031   SLICER_CHECK_EQ(header_->method_ids_off % 4, 0);
1032   SLICER_CHECK_EQ(header_->class_defs_off % 4, 0);
1033   SLICER_CHECK_GE(header_->map_off, header_->data_off);
1034   SLICER_CHECK_LT(header_->map_off, size_);
1035   SLICER_CHECK_EQ(header_->link_size, 0);
1036   SLICER_CHECK_EQ(header_->link_off, 0);
1037   SLICER_CHECK_EQ(header_->data_off % 4, 0);
1038   SLICER_CHECK_EQ(header_->map_off % 4, 0);
1039 
1040   // we seem to have .dex files with extra bytes at the end ...
1041   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1042   // estimate. b/72402467
1043   SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);
1044 
1045   // but we should still have the whole data section
1046 
1047   // Known issue: The fields might be slightly corrupted b/65452964
1048   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1049   // estimate. b/72402467
1050   // SLICER_CHECK_LE(header_->data_off + header_->data_size, size_);
1051 
1052   // validate the map
1053   // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
1054   auto map_list = ptr<dex::MapList>(header_->map_off);
1055   SLICER_CHECK_GT(map_list->size, 0);
1056   auto map_section_size =
1057       sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
1058   SLICER_CHECK_LE(header_->map_off + map_section_size, size_);
1059 }
1060 
1061 }  // namespace dex
1062