1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "slicer/dex_ir.h"
18
19 #include "slicer/chronometer.h"
20 #include "slicer/dex_utf8.h"
21 #include "slicer/dex_format.h"
22
23 #include <cstdint>
24 #include <algorithm>
25 #include <memory>
26 #include <sstream>
27 #include <vector>
28
29 namespace ir {
30
31 // DBJ2a string hash
HashString(const char * cstr)32 static uint32_t HashString(const char* cstr) {
33 uint32_t hash = 5381; // DBJ2 magic prime value
34 while (*cstr) {
35 hash = ((hash << 5) + hash) ^ *cstr++;
36 }
37 return hash;
38 }
39
Hash(const char * string_key) const40 uint32_t StringsHasher::Hash(const char* string_key) const {
41 return HashString(string_key);
42 }
43
Compare(const char * string_key,const String * string) const44 bool StringsHasher::Compare(const char* string_key, const String* string) const {
45 return dex::Utf8Cmp(string_key, string->c_str()) == 0;
46 }
47
Hash(const std::string & proto_key) const48 uint32_t ProtosHasher::Hash(const std::string& proto_key) const {
49 return HashString(proto_key.c_str());
50 }
51
Compare(const std::string & proto_key,const Proto * proto) const52 bool ProtosHasher::Compare(const std::string& proto_key, const Proto* proto) const {
53 return proto_key == proto->Signature();
54 }
55
GetKey(const EncodedMethod * method) const56 MethodKey MethodsHasher::GetKey(const EncodedMethod* method) const {
57 MethodKey method_key;
58 method_key.class_descriptor = method->decl->parent->descriptor;
59 method_key.method_name = method->decl->name;
60 method_key.prototype = method->decl->prototype;
61 return method_key;
62 }
63
Hash(const MethodKey & method_key) const64 uint32_t MethodsHasher::Hash(const MethodKey& method_key) const {
65 return static_cast<uint32_t>(std::hash<void*>{}(method_key.class_descriptor) ^
66 std::hash<void*>{}(method_key.method_name) ^
67 std::hash<void*>{}(method_key.prototype));
68 }
69
Compare(const MethodKey & method_key,const EncodedMethod * method) const70 bool MethodsHasher::Compare(const MethodKey& method_key, const EncodedMethod* method) const {
71 return method_key.class_descriptor == method->decl->parent->descriptor &&
72 method_key.method_name == method->decl->name &&
73 method_key.prototype == method->decl->prototype;
74 }
75
76 // Human-readable type declaration
Decl() const77 std::string Type::Decl() const {
78 return dex::DescriptorToDecl(descriptor->c_str());
79 }
80
GetCategory() const81 Type::Category Type::GetCategory() const {
82 switch (*descriptor->c_str()) {
83 case 'L':
84 case '[':
85 return Category::Reference;
86 case 'V':
87 return Category::Void;
88 case 'D':
89 case 'J':
90 return Category::WideScalar;
91 default:
92 return Category::Scalar;
93 }
94 }
95
96 // Create the corresponding JNI signature:
97 // https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/types.html#type_signatures
Signature() const98 std::string Proto::Signature() const {
99 std::stringstream ss;
100 ss << "(";
101 if (param_types != nullptr) {
102 for (const auto& type : param_types->types) {
103 ss << type->descriptor->c_str();
104 }
105 }
106 ss << ")";
107 ss << return_type->descriptor->c_str();
108 return ss.str();
109 }
110
IsField()111 bool MethodHandle::IsField(){
112 return (
113 method_handle_type == dex::METHOD_HANDLE_TYPE_STATIC_PUT ||
114 method_handle_type == dex::METHOD_HANDLE_TYPE_STATIC_GET ||
115 method_handle_type == dex::METHOD_HANDLE_TYPE_INSTANCE_PUT ||
116 method_handle_type == dex::METHOD_HANDLE_TYPE_INSTANCE_GET
117 );
118 }
119
120 // Helper for IR normalization
121 // (it sorts items and update the numeric idexes to match)
122 template <class T, class C>
IndexItems(std::vector<T> & items,C comp)123 static void IndexItems(std::vector<T>& items, C comp) {
124 std::sort(items.begin(), items.end(), comp);
125 for (size_t i = 0; i < items.size(); ++i) {
126 items[i]->index = i;
127 }
128 }
129
130 // Helper for IR normalization (DFS for topological sort)
131 //
132 // NOTE: this recursive version is clean and simple and we know
133 // that the max depth is bounded (exactly 1 for JVMTI and a small
134 // max for general case - the largest .dex file in AOSP has 5000 classes
135 // total)
136 //
TopSortClassIndex(Class * irClass,dex::u4 * nextIndex)137 void DexFile::TopSortClassIndex(Class* irClass, dex::u4* nextIndex) {
138 if (irClass->index == dex::u4(-1)) {
139 if (irClass->super_class && irClass->super_class->class_def) {
140 TopSortClassIndex(irClass->super_class->class_def, nextIndex);
141 }
142
143 if (irClass->interfaces) {
144 for (Type* interfaceType : irClass->interfaces->types) {
145 if (interfaceType->class_def) {
146 TopSortClassIndex(interfaceType->class_def, nextIndex);
147 }
148 }
149 }
150
151 SLICER_CHECK_LT(*nextIndex, classes.size());
152 irClass->index = (*nextIndex)++;
153 }
154 }
155
156 // Helper for IR normalization
157 // (topological sort the classes)
SortClassIndexes()158 void DexFile::SortClassIndexes() {
159 for (auto& irClass : classes) {
160 irClass->index = dex::u4(-1);
161 }
162
163 dex::u4 nextIndex = 0;
164 for (auto& irClass : classes) {
165 TopSortClassIndex(irClass.get(), &nextIndex);
166 }
167 }
168
169 // Helper for NormalizeClass()
SortEncodedFields(std::vector<EncodedField * > * fields)170 static void SortEncodedFields(std::vector<EncodedField*>* fields) {
171 std::sort(fields->begin(), fields->end(),
172 [](const EncodedField* a, const EncodedField* b) {
173 SLICER_CHECK(a->decl->index != b->decl->index || a == b);
174 return a->decl->index < b->decl->index;
175 });
176 }
177
178 // Helper for NormalizeClass()
SortEncodedMethods(std::vector<EncodedMethod * > * methods)179 static void SortEncodedMethods(std::vector<EncodedMethod*>* methods) {
180 std::sort(methods->begin(), methods->end(),
181 [](const EncodedMethod* a, const EncodedMethod* b) {
182 SLICER_CHECK(a->decl->index != b->decl->index || a == b);
183 return a->decl->index < b->decl->index;
184 });
185 }
186
187 // Helper for IR normalization
188 // (sort the field & method arrays)
NormalizeClass(Class * irClass)189 static void NormalizeClass(Class* irClass) {
190 SortEncodedFields(&irClass->static_fields);
191 SortEncodedFields(&irClass->instance_fields);
192 SortEncodedMethods(&irClass->direct_methods);
193 SortEncodedMethods(&irClass->virtual_methods);
194 }
195
196 // Prepare the IR for generating a .dex image
197 // (the .dex format requires a specific sort order for some of the arrays, etc...)
198 //
199 // TODO: not a great solution - move this logic to the writer!
200 //
201 // TODO: the comparison predicate can be better expressed by using std::tie()
202 // Ex. FieldDecl has a method comp() returning tie(parent->index, name->index, type->index)
203 //
Normalize()204 void DexFile::Normalize() {
205 // sort build the .dex indexes
206 IndexItems(strings, [](const own<String>& a, const own<String>& b) {
207 // this list must be sorted by std::string contents, using UTF-16 code point values
208 // (not in a locale-sensitive manner)
209 return dex::Utf8Cmp(a->c_str(), b->c_str()) < 0;
210 });
211
212 IndexItems(types, [](const own<Type>& a, const own<Type>& b) {
213 // this list must be sorted by string_id index
214 return a->descriptor->index < b->descriptor->index;
215 });
216
217 IndexItems(protos, [](const own<Proto>& a, const own<Proto>& b) {
218 // this list must be sorted in return-type (by type_id index) major order,
219 // and then by argument list (lexicographic ordering, individual arguments
220 // ordered by type_id index)
221 if (a->return_type->index != b->return_type->index) {
222 return a->return_type->index < b->return_type->index;
223 } else {
224 std::vector<Type*> empty;
225 const auto& aParamTypes = a->param_types ? a->param_types->types : empty;
226 const auto& bParamTypes = b->param_types ? b->param_types->types : empty;
227 return std::lexicographical_compare(
228 aParamTypes.begin(), aParamTypes.end(), bParamTypes.begin(),
229 bParamTypes.end(),
230 [](const Type* t1, const Type* t2) { return t1->index < t2->index; });
231 }
232 });
233
234 IndexItems(fields, [](const own<FieldDecl>& a, const own<FieldDecl>& b) {
235 // this list must be sorted, where the defining type (by type_id index) is
236 // the major order, field name (by string_id index) is the intermediate
237 // order, and type (by type_id index) is the minor order
238 return (a->parent->index != b->parent->index)
239 ? a->parent->index < b->parent->index
240 : (a->name->index != b->name->index)
241 ? a->name->index < b->name->index
242 : a->type->index < b->type->index;
243 });
244
245 IndexItems(methods, [](const own<MethodDecl>& a, const own<MethodDecl>& b) {
246 // this list must be sorted, where the defining type (by type_id index) is
247 // the major order, method name (by string_id index) is the intermediate
248 // order, and method prototype (by proto_id index) is the minor order
249 return (a->parent->index != b->parent->index)
250 ? a->parent->index < b->parent->index
251 : (a->name->index != b->name->index)
252 ? a->name->index < b->name->index
253 : a->prototype->index < b->prototype->index;
254 });
255
256 // reverse topological sort
257 //
258 // the classes must be ordered such that a given class's superclass and
259 // implemented interfaces appear in the list earlier than the referring
260 // class
261 //
262 // CONSIDER: for the BCI-only scenario we can avoid this
263 //
264 SortClassIndexes();
265
266 IndexItems(classes, [&](const own<Class>& a, const own<Class>& b) {
267 SLICER_CHECK_LT(a->index, classes.size());
268 SLICER_CHECK_LT(b->index, classes.size());
269 SLICER_CHECK(a->index != b->index || a == b);
270 return a->index < b->index;
271 });
272
273 // normalize class data
274 for (const auto& irClass : classes) {
275 NormalizeClass(irClass.get());
276 }
277
278 // normalize annotations
279 for (const auto& irAnnotation : annotations) {
280 // elements must be sorted in increasing order by string_id index
281 auto& elements = irAnnotation->elements;
282 std::sort(elements.begin(), elements.end(),
283 [](const AnnotationElement* a, const AnnotationElement* b) {
284 return a->name->index < b->name->index;
285 });
286 }
287
288 // normalize "annotation_set_item"
289 for (const auto& irAnnotationSet : annotation_sets) {
290 // The elements must be sorted in increasing order, by type_idx
291 auto& annotations = irAnnotationSet->annotations;
292 std::sort(annotations.begin(), annotations.end(),
293 [](const Annotation* a, const Annotation* b) {
294 return a->type->index < b->type->index;
295 });
296 }
297
298 // normalize "annotations_directory_item"
299 for (const auto& irAnnotationDirectory : annotations_directories) {
300 // field_annotations: The elements of the list must be
301 // sorted in increasing order, by field_idx
302 auto& field_annotations = irAnnotationDirectory->field_annotations;
303 std::sort(field_annotations.begin(), field_annotations.end(),
304 [](const FieldAnnotation* a, const FieldAnnotation* b) {
305 return a->field_decl->index < b->field_decl->index;
306 });
307
308 // method_annotations: The elements of the list must be
309 // sorted in increasing order, by method_idx
310 auto& method_annotations = irAnnotationDirectory->method_annotations;
311 std::sort(method_annotations.begin(), method_annotations.end(),
312 [](const MethodAnnotation* a, const MethodAnnotation* b) {
313 return a->method_decl->index < b->method_decl->index;
314 });
315
316 // parameter_annotations: The elements of the list must be
317 // sorted in increasing order, by method_idx
318 auto& param_annotations = irAnnotationDirectory->param_annotations;
319 std::sort(param_annotations.begin(), param_annotations.end(),
320 [](const ParamAnnotation* a, const ParamAnnotation* b) {
321 return a->method_decl->index < b->method_decl->index;
322 });
323 }
324 }
325
326 } // namespace ir
327
328