1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "slicer/reader.h"
18
19 #include "slicer/chronometer.h"
20 #include "slicer/dex_bytecode.h"
21 #include "slicer/dex_leb128.h"
22
23 #include <assert.h>
24 #include <string.h>
25 #include <type_traits>
26 #include <cstdlib>
27
28 namespace dex {
29
Reader(const dex::u1 * image,size_t size)30 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
31 // init the header reference
32 header_ = ptr<dex::Header>(0);
33 ValidateHeader();
34
35 // start with an "empty" .dex IR
36 dex_ir_ = std::make_shared<ir::DexFile>();
37 dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
38 }
39
ClassDefs() const40 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
41 return section<dex::ClassDef>(header_->class_defs_off,
42 header_->class_defs_size);
43 }
44
StringIds() const45 slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
46 return section<dex::StringId>(header_->string_ids_off,
47 header_->string_ids_size);
48 }
49
TypeIds() const50 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
51 return section<dex::TypeId>(header_->type_ids_off,
52 header_->type_ids_size);
53 }
54
FieldIds() const55 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
56 return section<dex::FieldId>(header_->field_ids_off,
57 header_->field_ids_size);
58 }
59
MethodIds() const60 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
61 return section<dex::MethodId>(header_->method_ids_off,
62 header_->method_ids_size);
63 }
64
ProtoIds() const65 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
66 return section<dex::ProtoId>(header_->proto_ids_off,
67 header_->proto_ids_size);
68 }
69
MethodHandles() const70 slicer::ArrayView<const dex::MethodHandle> Reader::MethodHandles() const {
71 const dex::MapList* ml = DexMapList();
72 if(ml == nullptr){
73 slicer::ArrayView<const dex::MethodHandle> ret;
74 return ret;
75 }
76
77 // Find MethodHandle entry
78 const dex::MapItem* mi = nullptr;
79 for(int i = 0; i < ml->size; i++){
80 if(ml->list[i].type == dex::kMethodHandleItem){
81 mi = &(ml->list[i]);
82 break;
83 }
84 }
85
86 if(mi == nullptr){
87 slicer::ArrayView<const dex::MethodHandle> ret;
88 return ret;
89 }
90
91 return section<dex::MethodHandle>(mi->offset, mi->size);
92 }
93
DexMapList() const94 const dex::MapList* Reader::DexMapList() const {
95 return dataPtr<dex::MapList>(header_->map_off);
96 }
97
GetStringMUTF8(dex::u4 index) const98 const char* Reader::GetStringMUTF8(dex::u4 index) const {
99 if (index == dex::kNoIndex) {
100 return "<no_string>";
101 }
102 const dex::u1* strData = GetStringData(index);
103 dex::ReadULeb128(&strData);
104 return reinterpret_cast<const char*>(strData);
105 }
106
CreateFullIr()107 void Reader::CreateFullIr() {
108 size_t classCount = ClassDefs().size();
109 for (size_t i = 0; i < classCount; ++i) {
110 CreateClassIr(i);
111 }
112 }
113
CreateClassIr(dex::u4 index)114 void Reader::CreateClassIr(dex::u4 index) {
115 auto ir_class = GetClass(index);
116 SLICER_CHECK_NE(ir_class, nullptr);
117 }
118
119 // Returns the index of the class with the specified
120 // descriptor, or kNoIndex if not found
FindClassIndex(const char * class_descriptor) const121 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
122 auto classes = ClassDefs();
123 auto types = TypeIds();
124 for (dex::u4 i = 0; i < classes.size(); ++i) {
125 auto typeId = types[classes[i].class_idx];
126 const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
127 if (strcmp(class_descriptor, descriptor) == 0) {
128 return i;
129 }
130 }
131 return dex::kNoIndex;
132 }
133
134 // map a .dex index to corresponding .dex IR node
135 //
136 // NOTES:
137 // 1. the mapping between an index and the indexed
138 // .dex IR nodes is 1:1
139 // 2. we do a single index lookup for both existing
140 // nodes as well as new nodes
141 // 3. placeholder is an invalid, but non-null pointer value
142 // used to check that the mapping lookup/update is atomic
143 // 4. there should be no recursion with the same index
144 // (we use the placeholder value to guard against this too)
145 //
GetClass(dex::u4 index)146 ir::Class* Reader::GetClass(dex::u4 index) {
147 SLICER_CHECK_NE(index, dex::kNoIndex);
148 auto& p = dex_ir_->classes_map[index];
149 auto placeholder = reinterpret_cast<ir::Class*>(1);
150 if (p == nullptr) {
151 p = placeholder;
152 auto newClass = ParseClass(index);
153 SLICER_CHECK_EQ(p, placeholder);
154 p = newClass;
155 dex_ir_->classes_indexes.MarkUsedIndex(index);
156 }
157 SLICER_CHECK_NE(p, placeholder);
158 return p;
159 }
160
161 // map a .dex index to corresponding .dex IR node
162 // (see the Reader::GetClass() comments)
GetType(dex::u4 index)163 ir::Type* Reader::GetType(dex::u4 index) {
164 SLICER_CHECK_NE(index, dex::kNoIndex);
165 auto& p = dex_ir_->types_map[index];
166 auto placeholder = reinterpret_cast<ir::Type*>(1);
167 if (p == nullptr) {
168 p = placeholder;
169 auto newType = ParseType(index);
170 SLICER_CHECK_EQ(p, placeholder);
171 p = newType;
172 dex_ir_->types_indexes.MarkUsedIndex(index);
173 }
174 SLICER_CHECK_NE(p, placeholder);
175 return p;
176 }
177
178 // map a .dex index to corresponding .dex IR node
179 // (see the Reader::GetClass() comments)
GetFieldDecl(dex::u4 index)180 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
181 SLICER_CHECK_NE(index, dex::kNoIndex);
182 auto& p = dex_ir_->fields_map[index];
183 auto placeholder = reinterpret_cast<ir::FieldDecl*>(1);
184 if (p == nullptr) {
185 p = placeholder;
186 auto newField = ParseFieldDecl(index);
187 SLICER_CHECK_EQ(p, placeholder);
188 p = newField;
189 dex_ir_->fields_indexes.MarkUsedIndex(index);
190 }
191 SLICER_CHECK_NE(p, placeholder);
192 return p;
193 }
194
GetMethodHandle(dex::u4 index)195 ir::MethodHandle* Reader::GetMethodHandle(dex::u4 index){
196 SLICER_CHECK_NE(index, dex::kNoIndex);
197 auto& p = dex_ir_->method_handles_map[index];
198 auto placeholder = reinterpret_cast<ir::MethodHandle*>(1);
199 if(p == nullptr) {
200 p = placeholder;
201 auto newMethodHandle = ParseMethodHandle(index);
202 SLICER_CHECK_EQ(p, placeholder);
203 p = newMethodHandle;
204 dex_ir_->method_handle_indexes.MarkUsedIndex(index);
205 }
206
207 SLICER_CHECK_NE(p, placeholder);
208 return p;
209 }
210
211 // map a .dex index to corresponding .dex IR node
212 // (see the Reader::GetClass() comments)
GetMethodDecl(dex::u4 index)213 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
214 SLICER_CHECK_NE(index, dex::kNoIndex);
215 auto& p = dex_ir_->methods_map[index];
216 auto placeholder = reinterpret_cast<ir::MethodDecl*>(1);
217 if (p == nullptr) {
218 p = placeholder;
219 auto newMethod = ParseMethodDecl(index);
220 SLICER_CHECK_EQ(p, placeholder);
221 p = newMethod;
222 dex_ir_->methods_indexes.MarkUsedIndex(index);
223 }
224 SLICER_CHECK_NE(p, placeholder);
225 return p;
226 }
227
228 // map a .dex index to corresponding .dex IR node
229 // (see the Reader::GetClass() comments)
GetProto(dex::u4 index)230 ir::Proto* Reader::GetProto(dex::u4 index) {
231 SLICER_CHECK_NE(index, dex::kNoIndex);
232 auto& p = dex_ir_->protos_map[index];
233 auto placeholder = reinterpret_cast<ir::Proto*>(1);
234 if (p == nullptr) {
235 p = placeholder;
236 auto newProto = ParseProto(index);
237 SLICER_CHECK_EQ(p, placeholder);
238 p = newProto;
239 dex_ir_->protos_indexes.MarkUsedIndex(index);
240 }
241 SLICER_CHECK_NE(p, placeholder);
242 return p;
243 }
244
245 // map a .dex index to corresponding .dex IR node
246 // (see the Reader::GetClass() comments)
GetString(dex::u4 index)247 ir::String* Reader::GetString(dex::u4 index) {
248 SLICER_CHECK_NE(index, dex::kNoIndex);
249 auto& p = dex_ir_->strings_map[index];
250 auto placeholder = reinterpret_cast<ir::String*>(1);
251 if (p == nullptr) {
252 p = placeholder;
253 auto newString = ParseString(index);
254 SLICER_CHECK_EQ(p, placeholder);
255 p = newString;
256 dex_ir_->strings_indexes.MarkUsedIndex(index);
257 }
258 SLICER_CHECK_NE(p, placeholder);
259 return p;
260 }
261
ParseClass(dex::u4 index)262 ir::Class* Reader::ParseClass(dex::u4 index) {
263 auto& dex_class_def = ClassDefs()[index];
264 auto ir_class = dex_ir_->Alloc<ir::Class>();
265
266 ir_class->type = GetType(dex_class_def.class_idx);
267 assert(ir_class->type->class_def == nullptr);
268 ir_class->type->class_def = ir_class;
269
270 ir_class->access_flags = dex_class_def.access_flags;
271 ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);
272
273 if (dex_class_def.superclass_idx != dex::kNoIndex) {
274 ir_class->super_class = GetType(dex_class_def.superclass_idx);
275 }
276
277 if (dex_class_def.source_file_idx != dex::kNoIndex) {
278 ir_class->source_file = GetString(dex_class_def.source_file_idx);
279 }
280
281 if (dex_class_def.class_data_off != 0) {
282 const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);
283
284 dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
285 dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
286 dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
287 dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);
288
289 dex::u4 base_index = dex::kNoIndex;
290 for (dex::u4 i = 0; i < static_fields_count; ++i) {
291 auto field = ParseEncodedField(&class_data, &base_index);
292 ir_class->static_fields.push_back(field);
293 }
294
295 base_index = dex::kNoIndex;
296 for (dex::u4 i = 0; i < instance_fields_count; ++i) {
297 auto field = ParseEncodedField(&class_data, &base_index);
298 ir_class->instance_fields.push_back(field);
299 }
300
301 base_index = dex::kNoIndex;
302 for (dex::u4 i = 0; i < direct_methods_count; ++i) {
303 auto method = ParseEncodedMethod(&class_data, &base_index);
304 ir_class->direct_methods.push_back(method);
305 }
306
307 base_index = dex::kNoIndex;
308 for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
309 auto method = ParseEncodedMethod(&class_data, &base_index);
310 ir_class->virtual_methods.push_back(method);
311 }
312 }
313
314 ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
315 ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
316 ir_class->orig_index = index;
317
318 return ir_class;
319 }
320
ExtractAnnotations(dex::u4 offset)321 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
322 if (offset == 0) {
323 return nullptr;
324 }
325
326 SLICER_CHECK_EQ(offset % 4, 0);
327
328 // first check if we already extracted the same "annotations_directory_item"
329 auto& ir_annotations = annotations_directories_[offset];
330 if (ir_annotations == nullptr) {
331 ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();
332
333 auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);
334
335 ir_annotations->class_annotation =
336 ExtractAnnotationSet(dex_annotations->class_annotations_off);
337
338 const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);
339
340 for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
341 ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
342 }
343
344 for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
345 ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
346 }
347
348 for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
349 ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
350 }
351 }
352 return ir_annotations;
353 }
354
ExtractAnnotationItem(dex::u4 offset)355 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
356 SLICER_CHECK_NE(offset, 0);
357
358 // first check if we already extracted the same "annotation_item"
359 auto& ir_annotation = annotations_[offset];
360 if (ir_annotation == nullptr) {
361 auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
362 const dex::u1* ptr = dexAnnotationItem->annotation;
363 ir_annotation = ParseAnnotation(&ptr);
364 ir_annotation->visibility = dexAnnotationItem->visibility;
365 }
366 return ir_annotation;
367 }
368
ExtractAnnotationSet(dex::u4 offset)369 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
370 if (offset == 0) {
371 return nullptr;
372 }
373
374 SLICER_CHECK_EQ(offset % 4, 0);
375
376 // first check if we already extracted the same "annotation_set_item"
377 auto& ir_annotation_set = annotation_sets_[offset];
378 if (ir_annotation_set == nullptr) {
379 ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();
380
381 auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
382 for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
383 auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
384 assert(ir_annotation != nullptr);
385 ir_annotation_set->annotations.push_back(ir_annotation);
386 }
387 }
388 return ir_annotation_set;
389 }
390
ExtractAnnotationSetRefList(dex::u4 offset)391 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
392 SLICER_CHECK_EQ(offset % 4, 0);
393
394 auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
395 auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();
396
397 for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
398 dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
399 if (entry_offset != 0) {
400 auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
401 SLICER_CHECK_NE(ir_annotation_set, nullptr);
402 ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
403 }
404 }
405
406 return ir_annotation_set_ref_list;
407 }
408
ParseFieldAnnotation(const dex::u1 ** pptr)409 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
410 auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
411 auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();
412
413 ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);
414
415 ir_field_annotation->annotations =
416 ExtractAnnotationSet(dex_field_annotation->annotations_off);
417 SLICER_CHECK_NE(ir_field_annotation->annotations, nullptr);
418
419 *pptr += sizeof(dex::FieldAnnotationsItem);
420 return ir_field_annotation;
421 }
422
ParseMethodAnnotation(const dex::u1 ** pptr)423 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
424 auto dex_method_annotation =
425 reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
426 auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();
427
428 ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);
429
430 ir_method_annotation->annotations =
431 ExtractAnnotationSet(dex_method_annotation->annotations_off);
432 SLICER_CHECK_NE(ir_method_annotation->annotations, nullptr);
433
434 *pptr += sizeof(dex::MethodAnnotationsItem);
435 return ir_method_annotation;
436 }
437
ParseParamAnnotation(const dex::u1 ** pptr)438 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
439 auto dex_param_annotation =
440 reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
441 auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();
442
443 ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);
444
445 ir_param_annotation->annotations =
446 ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
447 SLICER_CHECK_NE(ir_param_annotation->annotations, nullptr);
448
449 *pptr += sizeof(dex::ParameterAnnotationsItem);
450 return ir_param_annotation;
451 }
452
ParseEncodedField(const dex::u1 ** pptr,dex::u4 * base_index)453 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
454 auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();
455
456 auto field_index = dex::ReadULeb128(pptr);
457 SLICER_CHECK_NE(field_index, dex::kNoIndex);
458 if (*base_index != dex::kNoIndex) {
459 SLICER_CHECK_NE(field_index, 0);
460 field_index += *base_index;
461 }
462 *base_index = field_index;
463
464 ir_encoded_field->decl = GetFieldDecl(field_index);
465 ir_encoded_field->access_flags = dex::ReadULeb128(pptr);
466
467 return ir_encoded_field;
468 }
469
470 // Parse an encoded variable-length integer value
471 // (sign-extend signed types, zero-extend unsigned types)
472 template <class T>
ParseIntValue(const dex::u1 ** pptr,size_t size)473 static T ParseIntValue(const dex::u1** pptr, size_t size) {
474 static_assert(std::is_integral<T>::value, "must be an integral type");
475
476 SLICER_CHECK_GT(size, 0);
477 SLICER_CHECK_LE(size, sizeof(T));
478
479 T value = 0;
480 for (int i = 0; i < size; ++i) {
481 value |= T(*(*pptr)++) << (i * 8);
482 }
483
484 // sign-extend?
485 if (std::is_signed<T>::value) {
486 size_t shift = (sizeof(T) - size) * 8;
487 value = T(value << shift) >> shift;
488 }
489
490 return value;
491 }
492
493 // Parse an encoded variable-length floating point value
494 // (zero-extend to the right)
495 template <class T>
ParseFloatValue(const dex::u1 ** pptr,size_t size)496 static T ParseFloatValue(const dex::u1** pptr, size_t size) {
497 SLICER_CHECK_GT(size, 0);
498 SLICER_CHECK_LE(size, sizeof(T));
499
500 T value = 0;
501 int start_byte = sizeof(T) - size;
502 for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
503 --size) {
504 *p++ = *(*pptr)++;
505 }
506 return value;
507 }
508
ParseEncodedValue(const dex::u1 ** pptr)509 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
510 auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();
511
512 SLICER_EXTRA(auto base_ptr = *pptr);
513
514 dex::u1 header = *(*pptr)++;
515 dex::u1 type = header & dex::kEncodedValueTypeMask;
516 dex::u1 arg = header >> dex::kEncodedValueArgShift;
517
518 ir_encoded_value->type = type;
519
520 switch (type) {
521 case dex::kEncodedByte:
522 ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
523 break;
524
525 case dex::kEncodedShort:
526 ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
527 break;
528
529 case dex::kEncodedChar:
530 ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
531 break;
532
533 case dex::kEncodedInt:
534 ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
535 break;
536
537 case dex::kEncodedLong:
538 ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
539 break;
540
541 case dex::kEncodedFloat:
542 ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
543 break;
544
545 case dex::kEncodedDouble:
546 ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
547 break;
548
549 case dex::kEncodedString: {
550 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
551 ir_encoded_value->u.string_value = GetString(index);
552 } break;
553
554 case dex::kEncodedType: {
555 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
556 ir_encoded_value->u.type_value = GetType(index);
557 } break;
558
559 case dex::kEncodedField: {
560 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
561 ir_encoded_value->u.field_value = GetFieldDecl(index);
562 } break;
563
564 case dex::kEncodedMethod: {
565 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
566 ir_encoded_value->u.method_value = GetMethodDecl(index);
567 } break;
568
569 case dex::kEncodedEnum: {
570 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
571 ir_encoded_value->u.enum_value = GetFieldDecl(index);
572 } break;
573
574 case dex::kEncodedArray:
575 SLICER_CHECK_EQ(arg, 0);
576 ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
577 break;
578
579 case dex::kEncodedAnnotation:
580 SLICER_CHECK_EQ(arg, 0);
581 ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
582 break;
583
584 case dex::kEncodedNull:
585 SLICER_CHECK_EQ(arg, 0);
586 break;
587
588 case dex::kEncodedBoolean:
589 SLICER_CHECK_LT(arg, 2);
590 ir_encoded_value->u.bool_value = (arg == 1);
591 break;
592
593 default:
594 SLICER_CHECK(!"unexpected value type");
595 }
596
597 SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));
598
599 return ir_encoded_value;
600 }
601
ParseAnnotation(const dex::u1 ** pptr)602 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
603 auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();
604
605 dex::u4 type_index = dex::ReadULeb128(pptr);
606 dex::u4 elements_count = dex::ReadULeb128(pptr);
607
608 ir_annotation->type = GetType(type_index);
609 ir_annotation->visibility = dex::kVisibilityEncoded;
610
611 for (dex::u4 i = 0; i < elements_count; ++i) {
612 auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();
613
614 ir_element->name = GetString(dex::ReadULeb128(pptr));
615 ir_element->value = ParseEncodedValue(pptr);
616
617 ir_annotation->elements.push_back(ir_element);
618 }
619
620 return ir_annotation;
621 }
622
ParseEncodedArray(const dex::u1 ** pptr)623 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
624 auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();
625
626 dex::u4 count = dex::ReadULeb128(pptr);
627 for (dex::u4 i = 0; i < count; ++i) {
628 ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
629 }
630
631 return ir_encoded_array;
632 }
633
ExtractEncodedArray(dex::u4 offset)634 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
635 if (offset == 0) {
636 return nullptr;
637 }
638
639 // first check if we already extracted the same "annotation_item"
640 auto& ir_encoded_array = encoded_arrays_[offset];
641 if (ir_encoded_array == nullptr) {
642 auto ptr = dataPtr<dex::u1>(offset);
643 ir_encoded_array = ParseEncodedArray(&ptr);
644 }
645 return ir_encoded_array;
646 }
647
ExtractDebugInfo(dex::u4 offset)648 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
649 if (offset == 0) {
650 return nullptr;
651 }
652
653 auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
654 const dex::u1* ptr = dataPtr<dex::u1>(offset);
655
656 ir_debug_info->line_start = dex::ReadULeb128(&ptr);
657
658 // TODO: implicit this param for non-static methods?
659 dex::u4 param_count = dex::ReadULeb128(&ptr);
660 for (dex::u4 i = 0; i < param_count; ++i) {
661 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
662 auto ir_string =
663 (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
664 ir_debug_info->param_names.push_back(ir_string);
665 }
666
667 // parse the debug info opcodes and note the
668 // references to strings and types (to make sure the IR
669 // is the full closure of all referenced items)
670 //
671 // TODO: design a generic debug info iterator?
672 //
673 auto base_ptr = ptr;
674 dex::u1 opcode = 0;
675 while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
676 switch (opcode) {
677 case dex::DBG_ADVANCE_PC:
678 // addr_diff
679 dex::ReadULeb128(&ptr);
680 break;
681
682 case dex::DBG_ADVANCE_LINE:
683 // line_diff
684 dex::ReadSLeb128(&ptr);
685 break;
686
687 case dex::DBG_START_LOCAL: {
688 // register_num
689 dex::ReadULeb128(&ptr);
690
691 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
692 if (name_index != dex::kNoIndex) {
693 GetString(name_index);
694 }
695
696 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
697 if (type_index != dex::kNoIndex) {
698 GetType(type_index);
699 }
700 } break;
701
702 case dex::DBG_START_LOCAL_EXTENDED: {
703 // register_num
704 dex::ReadULeb128(&ptr);
705
706 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
707 if (name_index != dex::kNoIndex) {
708 GetString(name_index);
709 }
710
711 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
712 if (type_index != dex::kNoIndex) {
713 GetType(type_index);
714 }
715
716 dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
717 if (sig_index != dex::kNoIndex) {
718 GetString(sig_index);
719 }
720 } break;
721
722 case dex::DBG_END_LOCAL:
723 case dex::DBG_RESTART_LOCAL:
724 // register_num
725 dex::ReadULeb128(&ptr);
726 break;
727
728 case dex::DBG_SET_FILE: {
729 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
730 if (name_index != dex::kNoIndex) {
731 GetString(name_index);
732 }
733 } break;
734 }
735 }
736
737 ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);
738
739 return ir_debug_info;
740 }
741
ExtractCode(dex::u4 offset)742 ir::Code* Reader::ExtractCode(dex::u4 offset) {
743 if (offset == 0) {
744 return nullptr;
745 }
746
747 SLICER_CHECK_EQ(offset % 4, 0);
748
749 auto dex_code = dataPtr<dex::Code>(offset);
750 auto ir_code = dex_ir_->Alloc<ir::Code>();
751
752 ir_code->registers = dex_code->registers_size;
753 ir_code->ins_count = dex_code->ins_size;
754 ir_code->outs_count = dex_code->outs_size;
755
756 // instructions array
757 ir_code->instructions =
758 slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);
759
760 // parse the instructions to discover references to other
761 // IR nodes (see debug info stream parsing too)
762 ParseInstructions(ir_code->instructions);
763
764 // try blocks & handlers
765 //
766 // TODO: a generic try/catch blocks iterator?
767 //
768 if (dex_code->tries_size != 0) {
769 dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
770 auto tries =
771 reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
772 auto handlers_list =
773 reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);
774
775 ir_code->try_blocks =
776 slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);
777
778 // parse the handlers list (and discover embedded references)
779 auto ptr = handlers_list;
780
781 dex::u4 handlers_count = dex::ReadULeb128(&ptr);
782 SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);
783
784 for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
785 int catch_count = dex::ReadSLeb128(&ptr);
786
787 for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
788 dex::u4 type_index = dex::ReadULeb128(&ptr);
789 GetType(type_index);
790
791 // address
792 dex::ReadULeb128(&ptr);
793 }
794
795 if (catch_count < 1) {
796 // catch_all_addr
797 dex::ReadULeb128(&ptr);
798 }
799 }
800
801 ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
802 }
803
804 ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);
805
806 return ir_code;
807 }
808
ParseEncodedMethod(const dex::u1 ** pptr,dex::u4 * base_index)809 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
810 auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();
811
812 auto method_index = dex::ReadULeb128(pptr);
813 SLICER_CHECK_NE(method_index, dex::kNoIndex);
814 if (*base_index != dex::kNoIndex) {
815 SLICER_CHECK_NE(method_index, 0);
816 method_index += *base_index;
817 }
818 *base_index = method_index;
819
820 ir_encoded_method->decl = GetMethodDecl(method_index);
821 ir_encoded_method->access_flags = dex::ReadULeb128(pptr);
822
823 dex::u4 code_offset = dex::ReadULeb128(pptr);
824 ir_encoded_method->code = ExtractCode(code_offset);
825
826 // update the methods lookup table
827 dex_ir_->methods_lookup.Insert(ir_encoded_method);
828
829 return ir_encoded_method;
830 }
831
ParseType(dex::u4 index)832 ir::Type* Reader::ParseType(dex::u4 index) {
833 auto& dex_type = TypeIds()[index];
834 auto ir_type = dex_ir_->Alloc<ir::Type>();
835
836 ir_type->descriptor = GetString(dex_type.descriptor_idx);
837 ir_type->orig_index = index;
838
839 return ir_type;
840 }
841
ParseFieldDecl(dex::u4 index)842 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
843 auto& dex_field = FieldIds()[index];
844 auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();
845
846 ir_field->name = GetString(dex_field.name_idx);
847 ir_field->type = GetType(dex_field.type_idx);
848 ir_field->parent = GetType(dex_field.class_idx);
849 ir_field->orig_index = index;
850
851 return ir_field;
852 }
853
ParseMethodHandle(dex::u4 index)854 ir::MethodHandle* Reader::ParseMethodHandle(dex::u4 index){
855 auto& dex_method_handle = MethodHandles()[index];
856 auto ir_method_handle = dex_ir_->Alloc<ir::MethodHandle>();
857
858 ir_method_handle->method_handle_type = dex_method_handle.method_handle_type;
859
860 if(ir_method_handle->IsField()){
861 ir_method_handle->field = GetFieldDecl(dex_method_handle.field_or_method_id);
862 }
863 else {
864 ir_method_handle->method = GetMethodDecl(dex_method_handle.field_or_method_id);
865 }
866
867 return ir_method_handle;
868 }
869
ParseMethodDecl(dex::u4 index)870 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
871 auto& dex_method = MethodIds()[index];
872 auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();
873
874 ir_method->name = GetString(dex_method.name_idx);
875 ir_method->prototype = GetProto(dex_method.proto_idx);
876 ir_method->parent = GetType(dex_method.class_idx);
877 ir_method->orig_index = index;
878
879 return ir_method;
880 }
881
ExtractTypeList(dex::u4 offset)882 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
883 if (offset == 0) {
884 return nullptr;
885 }
886
887 // first check to see if we already extracted the same "type_list"
888 auto& ir_type_list = type_lists_[offset];
889 if (ir_type_list == nullptr) {
890 ir_type_list = dex_ir_->Alloc<ir::TypeList>();
891
892 auto dex_type_list = dataPtr<dex::TypeList>(offset);
893 SLICER_WEAK_CHECK(dex_type_list->size > 0);
894
895 for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
896 ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
897 }
898 }
899
900 return ir_type_list;
901 }
902
ParseProto(dex::u4 index)903 ir::Proto* Reader::ParseProto(dex::u4 index) {
904 auto& dex_proto = ProtoIds()[index];
905 auto ir_proto = dex_ir_->Alloc<ir::Proto>();
906
907 ir_proto->shorty = GetString(dex_proto.shorty_idx);
908 ir_proto->return_type = GetType(dex_proto.return_type_idx);
909 ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
910 ir_proto->orig_index = index;
911
912 // update the prototypes lookup table
913 dex_ir_->prototypes_lookup.Insert(ir_proto);
914
915 return ir_proto;
916 }
917
ParseString(dex::u4 index)918 ir::String* Reader::ParseString(dex::u4 index) {
919 auto ir_string = dex_ir_->Alloc<ir::String>();
920
921 auto data = GetStringData(index);
922 auto cstr = data;
923 dex::ReadULeb128(&cstr);
924 size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;
925
926 ir_string->data = slicer::MemView(data, size);
927 ir_string->orig_index = index;
928
929 // update the strings lookup table
930 dex_ir_->strings_lookup.Insert(ir_string);
931
932 return ir_string;
933 }
934
ParseInstructions(slicer::ArrayView<const dex::u2> code)935 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
936 const dex::u2* ptr = code.begin();
937 while (ptr < code.end()) {
938 auto dex_instr = dex::DecodeInstruction(ptr);
939
940 dex::u4 index = dex::kNoIndex;
941 dex::u4 index2 = dex::kNoIndex;
942 switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
943 case dex::k20bc:
944 case dex::k21c:
945 case dex::k31c:
946 case dex::k35c:
947 case dex::k3rc:
948 index = dex_instr.vB;
949 break;
950
951 case dex::k45cc:
952 case dex::k4rcc:
953 index = dex_instr.vB;
954 index2 = dex_instr.arg[4];
955 break;
956
957 case dex::k22c:
958 index = dex_instr.vC;
959 break;
960
961 default:
962 break;
963 }
964
965 switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
966 case dex::kIndexStringRef:
967 GetString(index);
968 break;
969
970 case dex::kIndexTypeRef:
971 GetType(index);
972 break;
973
974 case dex::kIndexFieldRef:
975 GetFieldDecl(index);
976 break;
977
978 case dex::kIndexMethodRef:
979 GetMethodDecl(index);
980 break;
981
982 case dex::kIndexMethodAndProtoRef:
983 GetMethodDecl(index);
984 GetProto(index2);
985 break;
986
987 case dex::kIndexMethodHandleRef:
988 GetMethodHandle(index);
989 break;
990
991 default:
992 break;
993 }
994
995 auto isize = dex::GetWidthFromBytecode(ptr);
996 SLICER_CHECK_GT(isize, 0);
997 ptr += isize;
998 }
999 SLICER_CHECK_EQ(ptr, code.end());
1000 }
1001
1002 // Basic .dex header structural checks
ValidateHeader()1003 void Reader::ValidateHeader() {
1004 SLICER_CHECK_GT(size_, dex::Header::kV40Size);
1005
1006 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1007 // estimate. b/72402467
1008 SLICER_CHECK_LE(header_->file_size, size_);
1009 // Check that we support this version of dex header
1010 SLICER_CHECK(
1011 header_->header_size == dex::Header::kV40Size ||
1012 header_->header_size == dex::Header::kV41Size);
1013 SLICER_CHECK_EQ(header_->endian_tag, dex::kEndianConstant);
1014 SLICER_CHECK_EQ(header_->data_size % 4, 0);
1015
1016 // If the dex file is within container with other dex files,
1017 // adjust the base address to the start of the container.
1018 SLICER_CHECK_LE(header_->ContainerSize() - header_->ContainerOff(), size_);
1019 image_ -= header_->ContainerOff();
1020 size_ = header_->ContainerSize();
1021
1022 // Known issue: The fields might be slighly corrupted b/65452964
1023 // SLICER_CHECK_LE(header_->data_off + header_->data_size, size_);
1024
1025 SLICER_CHECK_EQ(header_->string_ids_off % 4, 0);
1026 SLICER_CHECK_LT(header_->type_ids_size, 65536);
1027 SLICER_CHECK_EQ(header_->type_ids_off % 4, 0);
1028 SLICER_CHECK_LT(header_->proto_ids_size, 65536);
1029 SLICER_CHECK_EQ(header_->proto_ids_off % 4, 0);
1030 SLICER_CHECK_EQ(header_->field_ids_off % 4, 0);
1031 SLICER_CHECK_EQ(header_->method_ids_off % 4, 0);
1032 SLICER_CHECK_EQ(header_->class_defs_off % 4, 0);
1033 SLICER_CHECK_GE(header_->map_off, header_->data_off);
1034 SLICER_CHECK_LT(header_->map_off, size_);
1035 SLICER_CHECK_EQ(header_->link_size, 0);
1036 SLICER_CHECK_EQ(header_->link_off, 0);
1037 SLICER_CHECK_EQ(header_->data_off % 4, 0);
1038 SLICER_CHECK_EQ(header_->map_off % 4, 0);
1039
1040 // we seem to have .dex files with extra bytes at the end ...
1041 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1042 // estimate. b/72402467
1043 SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);
1044
1045 // but we should still have the whole data section
1046
1047 // Known issue: The fields might be slightly corrupted b/65452964
1048 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
1049 // estimate. b/72402467
1050 // SLICER_CHECK_LE(header_->data_off + header_->data_size, size_);
1051
1052 // validate the map
1053 // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
1054 auto map_list = ptr<dex::MapList>(header_->map_off);
1055 SLICER_CHECK_GT(map_list->size, 0);
1056 auto map_section_size =
1057 sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
1058 SLICER_CHECK_LE(header_->map_off + map_section_size, size_);
1059 }
1060
1061 } // namespace dex
1062