1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "slicer/dex_bytecode.h"
18 
19 #include "slicer/common.h"
20 
21 #include <array>
22 #include <iomanip>
23 #include <sstream>
24 
25 namespace dex {
26 
OpcodeFromBytecode(u2 bytecode)27 Opcode OpcodeFromBytecode(u2 bytecode) {
28   Opcode opcode = Opcode(bytecode & 0xff);
29   return opcode;
30 }
31 
32 // Table that maps each opcode to the index type implied by that opcode
33 static constexpr std::array<InstructionDescriptor, kNumPackedOpcodes>
34     gInstructionDescriptors = {{
35 #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \
36   {                                                                 \
37       vflags,                                                       \
38       format,                                                       \
39       index,                                                        \
40       flags,                                                        \
41   },
42 #include "export/slicer/dex_instruction_list.h"
43         DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
44 #undef DEX_INSTRUCTION_LIST
45 #undef INSTRUCTION_DESCR
46     }};
47 
GetIndexTypeFromOpcode(Opcode opcode)48 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) {
49   return gInstructionDescriptors[opcode].index_type;
50 }
51 
GetFormatFromOpcode(Opcode opcode)52 InstructionFormat GetFormatFromOpcode(Opcode opcode) {
53   return gInstructionDescriptors[opcode].format;
54 }
55 
GetFlagsFromOpcode(Opcode opcode)56 OpcodeFlags GetFlagsFromOpcode(Opcode opcode) {
57   return gInstructionDescriptors[opcode].flags;
58 }
59 
GetVerifyFlagsFromOpcode(Opcode opcode)60 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) {
61   return gInstructionDescriptors[opcode].verify_flags;
62 }
63 
GetWidthFromFormat(InstructionFormat format)64 size_t GetWidthFromFormat(InstructionFormat format) {
65   switch (format) {
66     case k10x:
67     case k12x:
68     case k11n:
69     case k11x:
70     case k10t:
71       return 1;
72     case k20t:
73     case k20bc:
74     case k21c:
75     case k22x:
76     case k21s:
77     case k21t:
78     case k21h:
79     case k23x:
80     case k22b:
81     case k22s:
82     case k22t:
83     case k22c:
84     case k22cs:
85       return 2;
86     case k30t:
87     case k31t:
88     case k31c:
89     case k32x:
90     case k31i:
91     case k35c:
92     case k35ms:
93     case k35mi:
94     case k3rc:
95     case k3rms:
96     case k3rmi:
97       return 3;
98     case k45cc:
99     case k4rcc:
100       return 4;
101     case k51l:
102       return 5;
103   }
104 }
105 
GetWidthFromBytecode(const u2 * bytecode)106 size_t GetWidthFromBytecode(const u2* bytecode) {
107   size_t width = 0;
108   if (*bytecode == kPackedSwitchSignature) {
109     width = 4 + bytecode[1] * 2;
110   } else if (*bytecode == kSparseSwitchSignature) {
111     width = 2 + bytecode[1] * 4;
112   } else if (*bytecode == kArrayDataSignature) {
113     u2 elemWidth = bytecode[1];
114     u4 len = bytecode[2] | (((u4)bytecode[3]) << 16);
115     // The plus 1 is to round up for odd size and width.
116     width = 4 + (elemWidth * len + 1) / 2;
117   } else {
118     width = GetWidthFromFormat(
119         GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0])));
120   }
121   return width;
122 }
123 
124 // Dalvik opcode names.
125 static constexpr std::array<const char*, kNumPackedOpcodes> gOpcodeNames = {
126 #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
127 #include "export/slicer/dex_instruction_list.h"
128     DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
129 #undef DEX_INSTRUCTION_LIST
130 #undef INSTRUCTION_NAME
131 };
132 
GetOpcodeName(Opcode opcode)133 const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; }
134 
135 // Helpers for DecodeInstruction()
InstA(u2 inst)136 static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; }
InstB(u2 inst)137 static u4 InstB(u2 inst) { return inst >> 12; }
InstAA(u2 inst)138 static u4 InstAA(u2 inst) { return inst >> 8; }
139 
140 // Helper for DecodeInstruction()
FetchU4(const u2 * ptr)141 static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); }
142 
143 // Helper for DecodeInstruction()
FetchU8(const u2 * ptr)144 static u8 FetchU8(const u2* ptr) {
145   return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32);
146 }
147 
148 // Decode a Dalvik bytecode and extract the individual fields
DecodeInstruction(const u2 * bytecode)149 Instruction DecodeInstruction(const u2* bytecode) {
150   u2 inst = bytecode[0];
151   Opcode opcode = OpcodeFromBytecode(inst);
152   InstructionFormat format = GetFormatFromOpcode(opcode);
153 
154   Instruction dec = {};
155   dec.opcode = opcode;
156 
157   switch (format) {
158     case k10x:  // op
159       return dec;
160     case k12x:  // op vA, vB
161       dec.vA = InstA(inst);
162       dec.vB = InstB(inst);
163       return dec;
164     case k11n:  // op vA, #+B
165       dec.vA = InstA(inst);
166       dec.vB = s4(InstB(inst) << 28) >> 28;  // sign extend 4-bit value
167       return dec;
168     case k11x:  // op vAA
169       dec.vA = InstAA(inst);
170       return dec;
171     case k10t:                    // op +AA
172       dec.vA = s1(InstAA(inst));  // sign-extend 8-bit value
173       return dec;
174     case k20t:                   // op +AAAA
175       dec.vA = s2(bytecode[1]);  // sign-extend 16-bit value
176       return dec;
177     case k20bc:  // [opt] op AA, thing@BBBB
178     case k21c:   // op vAA, thing@BBBB
179     case k22x:   // op vAA, vBBBB
180       dec.vA = InstAA(inst);
181       dec.vB = bytecode[1];
182       return dec;
183     case k21s:  // op vAA, #+BBBB
184     case k21t:  // op vAA, +BBBB
185       dec.vA = InstAA(inst);
186       dec.vB = s2(bytecode[1]);  // sign-extend 16-bit value
187       return dec;
188     case k21h:  // op vAA, #+BBBB0000[00000000]
189       dec.vA = InstAA(inst);
190       // The value should be treated as right-zero-extended, but we don't
191       // actually do that here. Among other things, we don't know if it's
192       // the top bits of a 32- or 64-bit value.
193       dec.vB = bytecode[1];
194       return dec;
195     case k23x:  // op vAA, vBB, vCC
196       dec.vA = InstAA(inst);
197       dec.vB = bytecode[1] & 0xff;
198       dec.vC = bytecode[1] >> 8;
199       return dec;
200     case k22b:  // op vAA, vBB, #+CC
201       dec.vA = InstAA(inst);
202       dec.vB = bytecode[1] & 0xff;
203       dec.vC = s1(bytecode[1] >> 8);  // sign-extend 8-bit value
204       return dec;
205     case k22s:  // op vA, vB, #+CCCC
206     case k22t:  // op vA, vB, +CCCC
207       dec.vA = InstA(inst);
208       dec.vB = InstB(inst);
209       dec.vC = s2(bytecode[1]);  // sign-extend 16-bit value
210       return dec;
211     case k22c:   // op vA, vB, thing@CCCC
212     case k22cs:  // [opt] op vA, vB, field offset CCCC
213       dec.vA = InstA(inst);
214       dec.vB = InstB(inst);
215       dec.vC = bytecode[1];
216       return dec;
217     case k30t:  // op +AAAAAAAA
218       dec.vA = FetchU4(bytecode + 1);
219       return dec;
220     case k31t:  // op vAA, +BBBBBBBB
221     case k31c:  // op vAA, string@BBBBBBBB
222       dec.vA = InstAA(inst);
223       dec.vB = FetchU4(bytecode + 1);
224       return dec;
225     case k32x:  // op vAAAA, vBBBB
226       dec.vA = bytecode[1];
227       dec.vB = bytecode[2];
228       return dec;
229     case k31i:  // op vAA, #+BBBBBBBB
230       dec.vA = InstAA(inst);
231       dec.vB = FetchU4(bytecode + 1);
232       return dec;
233     case k35c:               // op {vC, vD, vE, vF, vG}, thing@BBBB
234     case k35ms:              // [opt] invoke-virtual+super
235     case k35mi: {            // [opt] inline invoke
236       dec.vA = InstB(inst);  // This is labeled A in the spec.
237       dec.vB = bytecode[1];
238 
239       u2 regList = bytecode[2];
240 
241       // Copy the argument registers into the arg[] array, and
242       // also copy the first argument (if any) into vC. (The
243       // Instruction structure doesn't have separate
244       // fields for {vD, vE, vF, vG}, so there's no need to make
245       // copies of those.) Note that cases 5..2 fall through.
246       switch (dec.vA) {
247         case 5:
248           // A fifth arg is verboten for inline invokes
249           SLICER_CHECK_NE(format, k35mi);
250 
251           // Per note at the top of this format decoder, the
252           // fifth argument comes from the A field in the
253           // instruction, but it's labeled G in the spec.
254           dec.arg[4] = InstA(inst);
255           FALLTHROUGH_INTENDED;
256         case 4:
257           dec.arg[3] = (regList >> 12) & 0x0f;
258           FALLTHROUGH_INTENDED;
259         case 3:
260           dec.arg[2] = (regList >> 8) & 0x0f;
261           FALLTHROUGH_INTENDED;
262         case 2:
263           dec.arg[1] = (regList >> 4) & 0x0f;
264           FALLTHROUGH_INTENDED;
265         case 1:
266           dec.vC = dec.arg[0] = regList & 0x0f;
267           FALLTHROUGH_INTENDED;
268         case 0:
269           // Valid, but no need to do anything
270           return dec;
271       }
272     }
273       SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi");
274     case k3rc:   // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
275     case k3rms:  // [opt] invoke-virtual+super/range
276     case k3rmi:  // [opt] execute-inline/range
277       dec.vA = InstAA(inst);
278       dec.vB = bytecode[1];
279       dec.vC = bytecode[2];
280       return dec;
281     case k45cc: {
282       // AG op BBBB FEDC HHHH
283       dec.vA = InstB(inst);  // This is labelled A in the spec.
284       dec.vB = bytecode[1];  // vB meth@BBBB
285 
286       u2 regList = bytecode[2];
287       dec.vC = regList & 0xf;
288       dec.arg[0] = (regList >> 4) & 0xf;  // vD
289       dec.arg[1] = (regList >> 8) & 0xf;  // vE
290       dec.arg[2] = (regList >> 12);       // vF
291       dec.arg[3] = InstA(inst);           // vG
292       dec.arg[4] = bytecode[3];           // vH proto@HHHH
293     }
294       return dec;
295     case k4rcc:
296       // AA op BBBB CCCC HHHH
297       dec.vA = InstAA(inst);
298       dec.vB = bytecode[1];
299       dec.vC = bytecode[2];
300       dec.arg[4] = bytecode[3];  // vH proto@HHHH
301       return dec;
302     case k51l:  // op vAA, #+BBBBBBBBBBBBBBBB
303       dec.vA = InstAA(inst);
304       dec.vB_wide = FetchU8(bytecode + 1);
305       return dec;
306   }
307 
308   std::stringstream ss;
309   ss << "Can't decode unexpected format " << format << " for " << opcode;
310   SLICER_FATAL(ss.str());
311 }
312 
HexByte(int value)313 static inline std::string HexByte(int value) {
314   std::stringstream ss;
315   ss << "0x" << std::setw(2) << std::setfill('0') << std::hex << value;
316   return ss.str();
317 }
318 
operator <<(std::ostream & os,Opcode opcode)319 std::ostream& operator<<(std::ostream& os, Opcode opcode) {
320   return os << "[" << HexByte(opcode) << "] " << gOpcodeNames[opcode];
321 }
322 
operator <<(std::ostream & os,InstructionFormat format)323 std::ostream& operator<<(std::ostream& os, InstructionFormat format) {
324   switch (format) {
325   #define EMIT_INSTRUCTION_FORMAT_NAME(name) \
326     case InstructionFormat::k##name: return os << #name;
327   #include "export/slicer/dex_instruction_list.h"
328   DEX_INSTRUCTION_FORMAT_LIST(EMIT_INSTRUCTION_FORMAT_NAME)
329   #undef EMIT_INSTRUCTION_FORMAT_NAME
330   #undef DEX_INSTRUCTION_FORMAT_LIST
331   #undef DEX_INSTRUCTION_LIST
332   }
333   return os << "[" << HexByte(format) << "] " << "Unknown";
334 }
335 
336 }  // namespace dex
337