1#!/usr/bin/python 2# 3# Copyright (C) 2018 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""Parse assembler definition file. 19 20Definition JSON file for this script have following form: 21{ 22 "arch": "XXX", 23 "insns": [ 24 { 25 "name": "ShlbRegReg", 26 "args": [ 27 {"class": "GeneralReg8", "usage": "use_def"}, 28 {"class": "RCX", "usage": "use"}, 29 {"class": "FLAGS", "usage": "def"} 30 ], 31 "asm": "ShlbByCl", 32 "mnemo": "SHLB" 33 }, 34 ... 35 ] 36'arch' is primarily used for C++ namespace in LIR generator, and is ignored by 37this script. 38 39'insn' is array of objects, each describing single instruction variant. 40Each instruction is an object with following fields: 41 'name' - instruction unique name, used in LIR generator, typical name is 42 InsnOp1Op2, where 'Insn' is instruction name, 'Op1', 'Op2' - 43 operand types, such as Imm, Reg, Mem(Op), Base, Disp. 44 'args' - described ordered list of instruction arguments. 45 for each argument 'class' (any GP register, fixed GP register, 46 any XMM register, immediate, memory operand, flags register) 47 and how it is treated by an instruction (used, defined, 48 both used and defined) 49 'asm' - which internal assembler's mnemonic is used 50 'opcodes' - optional flag for autogeneration: if opcode bytes are specified 51 then implementation would be automatically generated 52 'reg_to_rm' - optional flag to make RM field in ModRegRM byte destination 53 (most instructions with two registers use reg as destination) 54 'mnemo' - how instruction shall be named in LIR dumps (ignored here) 55 56Memory operand for assembler instructions can be described as either opaque 57Operand class, which provides full power of x86 addressing modes, or as 58explicit BaseDisp format, which translates to reg+disp form. 59 60For some instructions (such as pop, push, jmp reg) exact register width is not 61specified, and 'GeneralReg' class is used, as same encoding is used for 32 and 6264 bit operands, depending on current CPU mode. 63 64This script produces inline file for internal assembler's header, such as for 65above example it would yield single line 66 67 void ShlbByCl(Register); 68 69Fixed arguments (such as 'RCX') and flags ('FLAGS') are ignored when generating 70assembler's header, while for others emitted an argument of type depending on 71argument's class. 72""" 73 74import copy 75import json 76 77 78def is_imm(arg_type): 79 return arg_type in ('Imm2', 'Imm8', 'Imm16', 'Imm32', 'Imm64') 80 81 82def is_disp(arg_type): 83 return arg_type == 'Disp' 84 85 86def is_mem_op(arg_type): 87 return arg_type in ('Mem8', 'Mem16', 'Mem32', 'Mem64', 'Mem128', 88 'MemX87', 'MemX8716', 'MemX8732', 'MemX8764', 'MemX8780', 89 'VecMem32', 'VecMem64', 'VecMem128') 90 91 92def is_cond(arg_type): 93 return arg_type == 'Cond' 94 95 96def is_label(arg_type): 97 return arg_type == 'Label' 98 99 100def is_x87reg(arg_type): 101 return arg_type == 'RegX87' 102 103 104def is_greg(arg_type): 105 return arg_type in ('GeneralReg', 106 'GeneralReg8', 'GeneralReg16', 107 'GeneralReg32', 'GeneralReg64') 108 109 110def is_xreg(arg_type): 111 return arg_type in ('XmmReg', 112 'VecReg64', 'VecReg128', 113 'FpReg32', 'FpReg64') 114 115 116# Operands of this type are NOT passed to assembler 117def is_implicit_reg(arg_type): 118 return arg_type in ('RAX', 'EAX', 'AX', 'AL', 119 'RCX', 'ECX', 'CL', 'ST', 'ST1', 120 'RDX', 'EDX', 'DX', 'CC', 121 'RBX', 'EBX', 'BX', 'SW', 122 'RDI', 'RSI', 'RSP', 'FLAGS') 123 124 125def exactly_one_of(iterable): 126 return sum(1 for elem in iterable if elem) == 1 127 128 129def get_mem_macro_name(insn, addr_mode = None): 130 macro_name = insn.get('asm') 131 if macro_name.endswith('ByCl'): 132 macro_name = macro_name[:-4] 133 for arg in insn['args']: 134 clazz = arg['class'] 135 # Don't reflect FLAGS or Conditions or Labels in the name - we don't ever 136 # have two different instructions where these cause the difference. 137 if clazz == 'FLAGS' or is_cond(clazz) or is_label(clazz): 138 pass 139 elif is_x87reg(clazz) or is_greg(clazz) or is_implicit_reg(clazz): 140 macro_name += 'Reg' 141 elif is_xreg(clazz): 142 macro_name += 'XReg' 143 elif is_imm(clazz): 144 macro_name += 'Imm' 145 elif is_mem_op(clazz): 146 if addr_mode is not None: 147 macro_name += 'Mem' + addr_mode 148 else: 149 macro_name += 'Op' 150 else: 151 raise Exception('arg type %s is not supported' % clazz) 152 return macro_name 153 154 155def _expand_name(insn, stem, encoding = {}): 156 # Make deep copy of the instruction to make sure consumers could treat them 157 # as independent entities and add/remove marks freely. 158 # 159 # JSON never have "merged" objects thus having them in result violates 160 # expectations. 161 expanded_insn = copy.deepcopy(insn) 162 expanded_insn['asm'] = stem 163 expanded_insn['name'] = get_mem_macro_name(expanded_insn) 164 expanded_insn['mnemo'] = stem.upper() 165 expanded_insn.update(encoding) 166 return expanded_insn 167 168 169def _expand_insn_by_encodings(insns): 170 expanded_insns = [] 171 for insn in insns: 172 if insn.get('encodings'): 173 assert all((f not in insn) for f in ['stems', 'name', 'asm', 'mnemo']) 174 # If we have encoding then we must have at least opcodes 175 assert all('opcodes' in encoding for _, encoding in insn['encodings'].items()) 176 expanded_insns.extend([_expand_name(insn, stem, encoding) 177 for stem, encoding in insn['encodings'].items()]) 178 elif insn.get('stems'): 179 assert all((f not in insn) for f in ['encoding', 'name', 'asm', 'mnemo']) 180 expanded_insns.extend([_expand_name(insn, stem) 181 for stem in insn['stems']]) 182 else: 183 assert all((f in insn) for f in ['name', 'asm', 'mnemo']) 184 expanded_insns.append(insn) 185 return expanded_insns 186 187 188def _expand_insns_by_operands(insns): 189 expanded_insns = [] 190 for insn in insns: 191 split_done = False 192 for arg in insn['args']: 193 if '/' in arg['class']: 194 assert not split_done 195 operand_classes = arg['class'].split('/') 196 for subclass in operand_classes: 197 arg['class'] = subclass 198 expanded_insn = copy.deepcopy(insn) 199 expanded_insns.append(expanded_insn) 200 split_done = True 201 if not split_done: 202 expanded_insns.append(insn) 203 return expanded_insns 204 205 206def load_asm_defs(asm_def): 207 result = [] 208 with open(asm_def) as asm: 209 obj = json.load(asm) 210 insns = obj.get('insns') 211 insns = _expand_insns_by_operands(insns) 212 insns = _expand_insn_by_encodings(insns) 213 insns = sorted(insns, key=lambda i: i.get('asm')) 214 result.extend(insns) 215 return obj.get('arch'), result 216