1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "arrayview.h"
20 #include "buffer.h"
21 #include "common.h"
22 #include "dex_format.h"
23 #include "dex_leb128.h"
24 #include "hash_table.h"
25 #include "index_map.h"
26 #include "memview.h"
27 
28 #include <stdlib.h>
29 #include <map>
30 #include <memory>
31 #include <vector>
32 #include <string>
33 
34 // A simple, lightweight IR to abstract the key .dex structures
35 //
36 // 1. All the cross-IR references are modeled as plain pointers.
37 // 2. Newly allocated nodes are mem-zeroed first
38 //
39 // This IR can mirror any .dex file, although for JVMTI BCI
40 // it's expected to construct the IR for the single modified class only
41 // (and include only the nodes referenced from that class)
42 
43 #define SLICER_IR_TYPE     \
44   using Node::Node; \
45   friend struct DexFile;
46 
47 #define SLICER_IR_INDEXED_TYPE           \
48   using IndexedNode::IndexedNode; \
49   friend struct DexFile;
50 
51 namespace ir {
52 
53 // convenience notation
54 template <class T>
55 using own = std::unique_ptr<T>;
56 
57 struct Node;
58 struct IndexedNode;
59 struct EncodedValue;
60 struct EncodedArray;
61 struct String;
62 struct Type;
63 struct TypeList;
64 struct Proto;
65 struct MethodHandle;
66 struct FieldDecl;
67 struct EncodedField;
68 struct DebugInfo;
69 struct Code;
70 struct MethodDecl;
71 struct EncodedMethod;
72 struct AnnotationElement;
73 struct Annotation;
74 struct AnnotationSet;
75 struct AnnotationSetRefList;
76 struct FieldAnnotation;
77 struct MethodAnnotation;
78 struct ParamAnnotation;
79 struct AnnotationsDirectory;
80 struct Class;
81 struct DexFile;
82 
83 // The base class for all the .dex IR types:
84 //   This is not a polymorphic interface, but
85 //   a way to constrain the allocation and ownership
86 //   of .dex IR nodes.
87 struct Node {
newNode88   void* operator new(size_t size) {
89     return ::calloc(1, size);
90   }
91 
92   void* operator new[](size_t size) {
93     return ::calloc(1, size);
94   }
95 
deleteNode96   void operator delete(void* ptr) {
97     ::free(ptr);
98   }
99 
100   void operator delete[](void* ptr) {
101     ::free(ptr);
102   }
103 
104  public:
105   Node(const Node&) = delete;
106   Node& operator=(const Node&) = delete;
107 
108  protected:
109   Node() = default;
110   ~Node() = default;
111 };
112 
113 // a concession for the convenience of the .dex writer
114 //
115 // TODO: consider moving the indexing to the writer.
116 //
117 struct IndexedNode : public Node {
118   SLICER_IR_TYPE;
119 
120   // this is the index in the generated image
121   // (not the original index)
122   dex::u4 index;
123 
124   // original indexe
125   // (from the source .dex image or allocated post reader)
126   dex::u4 orig_index;
127 };
128 
129 struct EncodedValue : public Node {
130   SLICER_IR_TYPE;
131 
132   dex::u1 type;
133   union {
134     int8_t byte_value;
135     int16_t short_value;
136     uint16_t char_value;
137     int32_t int_value;
138     int64_t long_value;
139     float float_value;
140     double double_value;
141     String* string_value;
142     Type* type_value;
143     FieldDecl* field_value;
144     MethodDecl* method_value;
145     FieldDecl* enum_value;
146     EncodedArray* array_value;
147     Annotation* annotation_value;
148     bool bool_value;
149   } u;
150 
151   SLICER_EXTRA(slicer::MemView original);
152 };
153 
154 struct EncodedArray : public Node {
155   SLICER_IR_TYPE;
156 
157   std::vector<EncodedValue*> values;
158 };
159 
160 struct String : public IndexedNode {
161   SLICER_IR_INDEXED_TYPE;
162 
163   // opaque DEX "string_data_item"
164   slicer::MemView data;
165 
c_strString166   const char* c_str() const {
167     const dex::u1* strData = data.ptr<dex::u1>();
168     dex::ReadULeb128(&strData);
169     return reinterpret_cast<const char*>(strData);
170   }
171 };
172 
173 struct Type : public IndexedNode {
174   SLICER_IR_INDEXED_TYPE;
175 
176   enum class Category { Void, Scalar, WideScalar, Reference };
177 
178   String* descriptor;
179   Class* class_def;
180 
181   std::string Decl() const;
182   Category GetCategory() const;
183 };
184 
185 struct TypeList : public Node {
186   SLICER_IR_TYPE;
187 
188   std::vector<Type*> types;
189 };
190 
191 struct Proto : public IndexedNode {
192   SLICER_IR_INDEXED_TYPE;
193 
194   String* shorty;
195   Type* return_type;
196   TypeList* param_types;
197 
198   std::string Signature() const;
199 };
200 
201 struct MethodHandle : public IndexedNode {
202   SLICER_IR_INDEXED_TYPE;
203 
204   dex::u2 method_handle_type;
205   MethodDecl* method;
206   FieldDecl* field;
207 
208   bool IsField();
209 };
210 
211 struct FieldDecl : public IndexedNode {
212   SLICER_IR_INDEXED_TYPE;
213 
214   String* name;
215   Type* type;
216   Type* parent;
217 };
218 
219 struct EncodedField : public Node {
220   SLICER_IR_TYPE;
221 
222   FieldDecl* decl;
223   dex::u4 access_flags;
224 };
225 
226 struct DebugInfo : public Node {
227   SLICER_IR_TYPE;
228 
229   dex::u4 line_start;
230   std::vector<String*> param_names;
231 
232   // original debug info opcodes stream
233   // (must be "relocated" when creating a new .dex image)
234   slicer::MemView data;
235 };
236 
237 struct Code : public Node {
238   SLICER_IR_TYPE;
239 
240   dex::u2 registers;
241   dex::u2 ins_count;
242   dex::u2 outs_count;
243   slicer::ArrayView<const dex::u2> instructions;
244   slicer::ArrayView<const dex::TryBlock> try_blocks;
245   slicer::MemView catch_handlers;
246   DebugInfo* debug_info;
247 };
248 
249 struct MethodDecl : public IndexedNode {
250   SLICER_IR_INDEXED_TYPE;
251 
252   String* name;
253   Proto* prototype;
254   Type* parent;
255 };
256 
257 struct EncodedMethod : public Node {
258   SLICER_IR_TYPE;
259 
260   MethodDecl* decl;
261   Code* code;
262   dex::u4 access_flags;
263 };
264 
265 struct AnnotationElement : public Node {
266   SLICER_IR_TYPE;
267 
268   String* name;
269   EncodedValue* value;
270 };
271 
272 struct Annotation : public Node {
273   SLICER_IR_TYPE;
274 
275   Type* type;
276   std::vector<AnnotationElement*> elements;
277   dex::u1 visibility;
278 };
279 
280 struct AnnotationSet : public Node {
281   SLICER_IR_TYPE;
282 
283   std::vector<Annotation*> annotations;
284 };
285 
286 struct AnnotationSetRefList : public Node {
287   SLICER_IR_TYPE;
288 
289   std::vector<AnnotationSet*> annotations;
290 };
291 
292 struct FieldAnnotation : public Node {
293   SLICER_IR_TYPE;
294 
295   FieldDecl* field_decl;
296   AnnotationSet* annotations;
297 };
298 
299 struct MethodAnnotation : public Node {
300   SLICER_IR_TYPE;
301 
302   MethodDecl* method_decl;
303   AnnotationSet* annotations;
304 };
305 
306 struct ParamAnnotation : public Node {
307   SLICER_IR_TYPE;
308 
309   MethodDecl* method_decl;
310   AnnotationSetRefList* annotations;
311 };
312 
313 struct AnnotationsDirectory : public Node {
314   SLICER_IR_TYPE;
315 
316   AnnotationSet* class_annotation;
317   std::vector<FieldAnnotation*> field_annotations;
318   std::vector<MethodAnnotation*> method_annotations;
319   std::vector<ParamAnnotation*> param_annotations;
320 };
321 
322 struct Class : public IndexedNode {
323   SLICER_IR_INDEXED_TYPE;
324 
325   Type* type;
326   dex::u4 access_flags;
327   Type* super_class;
328   TypeList* interfaces;
329   String* source_file;
330   AnnotationsDirectory* annotations;
331   EncodedArray* static_init;
332 
333   std::vector<EncodedField*> static_fields;
334   std::vector<EncodedField*> instance_fields;
335   std::vector<EncodedMethod*> direct_methods;
336   std::vector<EncodedMethod*> virtual_methods;
337 };
338 
339 // ir::String hashing
340 struct StringsHasher {
GetKeyStringsHasher341   const char* GetKey(const String* string) const { return string->c_str(); }
342   uint32_t Hash(const char* string_key) const;
343   bool Compare(const char* string_key, const String* string) const;
344 };
345 
346 // ir::Proto hashing
347 struct ProtosHasher {
GetKeyProtosHasher348   std::string GetKey(const Proto* proto) const { return proto->Signature(); }
349   uint32_t Hash(const std::string& proto_key) const;
350   bool Compare(const std::string& proto_key, const Proto* proto) const;
351 };
352 
353 // ir::EncodedMethod hashing
354 struct MethodKey {
355   String* class_descriptor = nullptr;
356   String* method_name = nullptr;
357   Proto* prototype = nullptr;
358 };
359 
360 struct MethodsHasher {
361   MethodKey GetKey(const EncodedMethod* method) const;
362   uint32_t Hash(const MethodKey& method_key) const;
363   bool Compare(const MethodKey& method_key, const EncodedMethod* method) const;
364 };
365 
366 using StringsLookup = slicer::HashTable<const char*, String, StringsHasher>;
367 using PrototypesLookup = slicer::HashTable<const std::string&, Proto, ProtosHasher>;
368 using MethodsLookup = slicer::HashTable<const MethodKey&, EncodedMethod, MethodsHasher>;
369 
370 // The main container/root for a .dex IR
371 struct DexFile {
372   // indexed structures
373   std::vector<own<String>> strings;
374   std::vector<own<Type>> types;
375   std::vector<own<Proto>> protos;
376   std::vector<own<FieldDecl>> fields;
377   std::vector<own<MethodDecl>> methods;
378   std::vector<own<Class>> classes;
379   std::vector<own<MethodHandle>> method_handles;
380 
381   // data segment structures
382   std::vector<own<EncodedField>> encoded_fields;
383   std::vector<own<EncodedMethod>> encoded_methods;
384   std::vector<own<TypeList>> type_lists;
385   std::vector<own<Code>> code;
386   std::vector<own<DebugInfo>> debug_info;
387   std::vector<own<EncodedValue>> encoded_values;
388   std::vector<own<EncodedArray>> encoded_arrays;
389   std::vector<own<Annotation>> annotations;
390   std::vector<own<AnnotationElement>> annotation_elements;
391   std::vector<own<AnnotationSet>> annotation_sets;
392   std::vector<own<AnnotationSetRefList>> annotation_set_ref_lists;
393   std::vector<own<AnnotationsDirectory>> annotations_directories;
394   std::vector<own<FieldAnnotation>> field_annotations;
395   std::vector<own<MethodAnnotation>> method_annotations;
396   std::vector<own<ParamAnnotation>> param_annotations;
397 
398   // original index to IR node mappings
399   //
400   // CONSIDER: we only need to carry around
401   //   the relocation for the referenced items
402   //
403   std::map<dex::u4, Type*> types_map;
404   std::map<dex::u4, String*> strings_map;
405   std::map<dex::u4, Proto*> protos_map;
406   std::map<dex::u4, FieldDecl*> fields_map;
407   std::map<dex::u4, MethodDecl*> methods_map;
408   std::map<dex::u4, Class*> classes_map;
409   std::map<dex::u4, MethodHandle*> method_handles_map;
410 
411   // original .dex header "magic" signature
412   slicer::MemView magic;
413 
414   // keep track of the used index values
415   // (so we can easily allocate new ones)
416   IndexMap strings_indexes;
417   IndexMap types_indexes;
418   IndexMap protos_indexes;
419   IndexMap fields_indexes;
420   IndexMap methods_indexes;
421   IndexMap classes_indexes;
422   IndexMap method_handle_indexes;
423 
424   // lookup hash tables
425   StringsLookup strings_lookup;
426   MethodsLookup methods_lookup;
427   PrototypesLookup prototypes_lookup;
428 
429  public:
430   DexFile() = default;
431 
432   // No copy/move semantics
433   DexFile(const DexFile&) = delete;
434   DexFile& operator=(const DexFile&) = delete;
435 
436   template <class T>
AllocDexFile437   T* Alloc() {
438     T* p = new T();
439     Track(p);
440     return p;
441   }
442 
AttachBufferDexFile443   void AttachBuffer(slicer::Buffer&& buffer) {
444     buffers_.push_back(std::move(buffer));
445   }
446 
447   void Normalize();
448 
449  private:
450   void TopSortClassIndex(Class* irClass, dex::u4* nextIndex);
451   void SortClassIndexes();
452 
453   template <class T>
PushOwnDexFile454   void PushOwn(std::vector<own<T>>& v, T* p) {
455     v.push_back(own<T>(p));
456   }
457 
TrackDexFile458   void Track(String* p) { PushOwn(strings, p); }
TrackDexFile459   void Track(Type* p) { PushOwn(types, p); }
TrackDexFile460   void Track(Proto* p) { PushOwn(protos, p); }
TrackDexFile461   void Track(FieldDecl* p) { PushOwn(fields, p); }
TrackDexFile462   void Track(MethodDecl* p) { PushOwn(methods, p); }
TrackDexFile463   void Track(Class* p) { PushOwn(classes, p); }
TrackDexFile464   void Track(MethodHandle* p) { PushOwn(method_handles, p); }
465 
TrackDexFile466   void Track(EncodedField* p) { PushOwn(encoded_fields, p); }
TrackDexFile467   void Track(EncodedMethod* p) { PushOwn(encoded_methods, p); }
TrackDexFile468   void Track(TypeList* p) { PushOwn(type_lists, p); }
TrackDexFile469   void Track(Code* p) { PushOwn(code, p); }
TrackDexFile470   void Track(DebugInfo* p) { PushOwn(debug_info, p); }
TrackDexFile471   void Track(EncodedValue* p) { PushOwn(encoded_values, p); }
TrackDexFile472   void Track(EncodedArray* p) { PushOwn(encoded_arrays, p); }
TrackDexFile473   void Track(Annotation* p) { PushOwn(annotations, p); }
TrackDexFile474   void Track(AnnotationElement* p) { PushOwn(annotation_elements, p); }
TrackDexFile475   void Track(AnnotationSet* p) { PushOwn(annotation_sets, p); }
TrackDexFile476   void Track(AnnotationSetRefList* p) { PushOwn(annotation_set_ref_lists, p); }
TrackDexFile477   void Track(AnnotationsDirectory* p) { PushOwn(annotations_directories, p); }
TrackDexFile478   void Track(FieldAnnotation* p) { PushOwn(field_annotations, p); }
TrackDexFile479   void Track(MethodAnnotation* p) { PushOwn(method_annotations, p); }
TrackDexFile480   void Track(ParamAnnotation* p) { PushOwn(param_annotations, p); }
481 
482 private:
483   // additional memory buffers owned by this .dex IR
484   std::vector<slicer::Buffer> buffers_;
485 };
486 
487 }  // namespace ir
488 
489 #undef SLICER_IR_TYPE
490 #undef SLICER_IR_INDEXED_TYPE
491