1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "dex_format.h"
20 
21 #include <iosfwd>
22 #include <stddef.h>
23 
24 // .dex bytecode definitions and helpers:
25 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html
26 
27 namespace dex {
28 
29 // The number of Dalvik opcodes
30 constexpr size_t kNumPackedOpcodes = 0x100;
31 
32 // Switch table and array data signatures are a code unit consisting
33 // of "NOP" (0x00) in the low-order byte and a non-zero identifying
34 // code in the high-order byte. (A true NOP is 0x0000.)
35 constexpr u2 kPackedSwitchSignature = 0x0100;
36 constexpr u2 kSparseSwitchSignature = 0x0200;
37 constexpr u2 kArrayDataSignature = 0x0300;
38 
39 // Include for  DEX_INSTRUCTION_LIST and DEX_INSTRUCTION_FORMAT_LIST
40 #include "dex_instruction_list.h"
41 
42 // Enumeration of all Dalvik opcodes
43 enum Opcode : u1 {
44 #define INSTRUCTION_ENUM(opcode, cname, ...) OP_##cname = (opcode),
45   DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
46 #undef INSTRUCTION_ENUM
47 };
48 
49 // Instruction formats associated with Dalvik opcodes
50 enum InstructionFormat : u1 {
51 #define INSTRUCTION_FORMAT_ENUM(name) k##name,
52 #include "dex_instruction_list.h"
53   DEX_INSTRUCTION_FORMAT_LIST(INSTRUCTION_FORMAT_ENUM)
54 #undef INSTRUCTION_FORMAT_ENUM
55 };
56 
57 #undef DEX_INSTRUCTION_FORMAT_LIST
58 #undef DEX_INSTRUCTION_LIST
59 
60 using OpcodeFlags = u1;
61 enum : OpcodeFlags {
62   kBranch = 0x01,         // conditional or unconditional branch
63   kContinue = 0x02,       // flow can continue to next statement
64   kSwitch = 0x04,         // switch statement
65   kThrow = 0x08,          // could cause an exception to be thrown
66   kReturn = 0x10,         // returns, no additional statements
67   kInvoke = 0x20,         // a flavor of invoke
68   kUnconditional = 0x40,  // unconditional branch
69   kExperimental = 0x80,   // is an experimental opcode
70 };
71 
72 using VerifyFlags = u4;
73 enum : VerifyFlags {
74   kVerifyNothing = 0x0000000,
75   kVerifyRegA = 0x0000001,
76   kVerifyRegAWide = 0x0000002,
77   kVerifyRegB = 0x0000004,
78   kVerifyRegBField = 0x0000008,
79   kVerifyRegBMethod = 0x0000010,
80   kVerifyRegBNewInstance = 0x0000020,
81   kVerifyRegBString = 0x0000040,
82   kVerifyRegBType = 0x0000080,
83   kVerifyRegBWide = 0x0000100,
84   kVerifyRegC = 0x0000200,
85   kVerifyRegCField = 0x0000400,
86   kVerifyRegCNewArray = 0x0000800,
87   kVerifyRegCType = 0x0001000,
88   kVerifyRegCWide = 0x0002000,
89   kVerifyArrayData = 0x0004000,
90   kVerifyBranchTarget = 0x0008000,
91   kVerifySwitchTargets = 0x0010000,
92   kVerifyVarArg = 0x0020000,
93   kVerifyVarArgNonZero = 0x0040000,
94   kVerifyVarArgRange = 0x0080000,
95   kVerifyVarArgRangeNonZero = 0x0100000,
96   kVerifyRuntimeOnly = 0x0200000,
97   kVerifyError = 0x0400000,
98   kVerifyRegHPrototype = 0x0800000,
99   kVerifyRegBCallSite = 0x1000000,
100   kVerifyRegBMethodHandle = 0x2000000,
101   kVerifyRegBPrototype = 0x4000000,
102 };
103 
104 // Types of indexed reference that are associated with opcodes whose
105 // formats include such an indexed reference (e.g., 21c and 35c).
106 enum InstructionIndexType : u1 {
107   kIndexUnknown = 0,
108   kIndexNone,               // has no index
109   kIndexVaries,             // "It depends." Used for throw-verification-error
110   kIndexTypeRef,            // type reference index
111   kIndexStringRef,          // string reference index
112   kIndexMethodRef,          // method reference index
113   kIndexFieldRef,           // field reference index
114   kIndexInlineMethod,       // inline method index (for inline linked methods)
115   kIndexVtableOffset,       // vtable offset (for static linked methods)
116   kIndexFieldOffset,        // field offset (for static linked fields)
117   kIndexMethodAndProtoRef,  // method index and proto index
118   kIndexCallSiteRef,        // call site index
119   kIndexMethodHandleRef,    // constant method handle reference index
120   kIndexProtoRef,           // constant prototype reference index
121 };
122 
123 // Holds the contents of a decoded instruction.
124 struct Instruction {
125   u4 vA;          // the A field of the instruction
126   u4 vB;          // the B field of the instruction
127   u8 vB_wide;     // 64bit version of the B field (for k51l)
128   u4 vC;          // the C field of the instruction
129   u4 arg[5];      // vC/D/E/F/G in invoke or filled-new-array
130   Opcode opcode;  // instruction opcode
131 };
132 
133 // "packed-switch-payload" format
134 struct PackedSwitchPayload {
135   u2 ident;
136   u2 size;
137   s4 first_key;
138   s4 targets[];
139 };
140 
141 // "sparse-switch-payload" format
142 struct SparseSwitchPayload {
143   u2 ident;
144   u2 size;
145   s4 data[];
146 };
147 
148 // "fill-array-data-payload" format
149 struct ArrayData {
150   u2 ident;
151   u2 element_width;
152   u4 size;
153   u1 data[];
154 };
155 
156 // Collect the enums in a struct for better locality.
157 struct InstructionDescriptor {
158   u4 verify_flags;  // Set of VerifyFlag.
159   InstructionFormat format;
160   InstructionIndexType index_type;
161   u1 flags;  // Set of Flags.
162 };
163 
164 // Extracts the opcode from a Dalvik code unit (bytecode)
165 Opcode OpcodeFromBytecode(u2 bytecode);
166 
167 // Returns the name of an opcode
168 const char* GetOpcodeName(Opcode opcode);
169 
170 // Returns the index type associated with the specified opcode
171 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode);
172 
173 // Returns the format associated with the specified opcode
174 InstructionFormat GetFormatFromOpcode(Opcode opcode);
175 
176 // Returns the flags for the specified opcode
177 OpcodeFlags GetFlagsFromOpcode(Opcode opcode);
178 
179 // Returns the verify flags for the specified opcode
180 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode);
181 
182 // Returns the instruction width for the specified opcode format
183 size_t GetWidthFromFormat(InstructionFormat format);
184 
185 // Return the width of the specified instruction, or 0 if not defined.  Also
186 // works for special OP_NOP entries, including switch statement data tables
187 // and array data.
188 size_t GetWidthFromBytecode(const u2* bytecode);
189 
190 // Decode a .dex bytecode
191 Instruction DecodeInstruction(const u2* bytecode);
192 
193 // Writes a hex formatted opcode to an output stream.
194 std::ostream& operator<<(std::ostream& os, Opcode opcode);
195 
196 // Writes name of format to an outputstream.
197 std::ostream& operator<<(std::ostream& os, InstructionFormat format);
198 
199 }  // namespace dex
200