1#!/usr/bin/python 2# 3# Copyright (C) 2023 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Generate LIR files out of the definition file. 18 19* Operand usage 20 21Register allocator needs operand usage to learn which operands can share the 22same register. 23 24To understand register sharing options, register allocator assumes insn works 25in these steps: 26- read input operands 27- do the job 28- write output operands 29 30So, input-output operands should have dedicated registers, while input-only 31operands can share registers with output-only operands. 32 33There might be an exception when output-only operand is written before all 34input-only operands are read, so its register can't be shared. Such operands 35are usually referred as output-only-early-clobber operands. 36 37For register sharing, output-only-early-clobber operand is the same as 38input-output operand, but it is unnatural to describe output-only as 39input-output, so we use a special keyword for it. 40 41Finally, keywords are: 42use - input-only 43def - output-only 44def_early_clobber - output-only-early-clobber 45use_def - input-output 46 47* Scratch operands 48 49Scratch operands are actually output operands - indeed, their original value 50is not used and they get some new value after the insn is done. However, they 51are usually written before all input operands are read, so it makes sense to 52describe scratch operands as output-only-early-clobber. 53""" 54 55import asm_defs 56import json 57import sys 58 59 60def _is_reg(arg_type): 61 return (asm_defs.is_greg(arg_type) or 62 asm_defs.is_xreg(arg_type) or 63 asm_defs.is_implicit_reg(arg_type)) 64 65 66class Operand(object): 67 pass 68 69 70def _get_reg_operand_info(usage, kind): 71 if usage == 'use': 72 return '{ &k%s, MachineRegKind::kUse }' % (kind) 73 if usage == 'def': 74 return '{ &k%s, MachineRegKind::kDef }' % (kind) 75 if usage == 'use_def': 76 return '{ &k%s, MachineRegKind::kUseDef }' % (kind) 77 if usage == 'def_early_clobber': 78 return '{ &k%s, MachineRegKind::kDefEarlyClobber }' % (kind) 79 assert False, 'unknown operand usage %s' % (usage) 80 81 82def _make_reg_operand(r, usage, kind): 83 op = Operand() 84 op.type = 'MachineReg' 85 op.name = 'r%d' % (r) 86 op.reg_operand_info = _get_reg_operand_info(usage, kind) 87 op.initializer = 'SetRegAt(%d, r%d)' % (r, r) 88 if asm_defs.is_greg(kind): 89 op.asm_arg = 'GetGReg(RegAt(%d))' % (r) 90 elif asm_defs.is_xreg(kind): 91 op.asm_arg = 'GetXReg(RegAt(%d))' % (r) 92 elif asm_defs.is_implicit_reg(kind): 93 op.asm_arg = None 94 else: 95 assert False, 'unknown register kind %s' % (kind) 96 return op 97 98 99def _make_imm_operand(bits): 100 op = Operand() 101 op.type = 'int%s_t' % (bits) 102 op.name = 'imm' 103 op.reg_operand_info = None 104 op.initializer = 'set_imm(imm)' 105 op.asm_arg = 'static_cast<%s>(imm())' % (op.type) 106 return op 107 108 109def _make_scale_operand(): 110 op = Operand() 111 op.type = 'MachineMemOperandScale' 112 op.name = 'scale' 113 op.reg_operand_info = None 114 op.initializer = 'set_scale(scale)' 115 op.asm_arg = 'ToScaleFactor(scale())' 116 return op 117 118 119def _make_disp_operand(): 120 op = Operand() 121 op.type = 'uint32_t' 122 op.name = 'disp' 123 op.reg_operand_info = None 124 op.initializer = 'set_disp(disp)' 125 op.asm_arg = 'disp()' 126 return op 127 128 129def _make_cond_operand(): 130 op = Operand() 131 op.type = 'Assembler::Condition' 132 op.name = 'cond' 133 op.reg_operand_info = None 134 op.initializer = 'set_cond(cond)' 135 op.asm_arg = 'cond()' 136 return op 137 138 139def _make_label_operand(): 140 op = Operand() 141 # We never have both immediate and Label in same insn. 142 op.type = 'Label*' 143 op.name = 'label' 144 op.reg_operand_info = None 145 op.initializer = 'set_imm(reinterpret_cast<uintptr_t>(label))' 146 op.asm_arg = '*reinterpret_cast<Label*>(imm())' 147 return op 148 149 150def _check_insn_defs(insn, skip_unsupported=False): 151 seen_imm = False 152 seen_memop = False 153 seen_disp = False 154 for arg in insn.get('args'): 155 kind = arg.get('class') 156 if _is_reg(kind): 157 pass 158 elif asm_defs.is_imm(kind): 159 # We share field for immediate and label in 'insn'. 160 assert not seen_imm 161 seen_imm = True 162 elif asm_defs.is_mem_op(kind): 163 # No insn can have more than one memop. 164 assert not seen_memop 165 addr_mode = insn.get('addr_mode') 166 if skip_unsupported: 167 if addr_mode not in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'): 168 return False 169 assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \ 170 'unknown addressing mode %s' % (addr_mode) 171 seen_memop = True 172 elif asm_defs.is_disp(kind): 173 assert not seen_disp 174 seen_disp = True 175 elif asm_defs.is_cond(kind): 176 pass 177 elif asm_defs.is_label(kind): 178 assert not seen_imm 179 seen_imm = True 180 else: 181 assert False, 'unknown operand class %s' % (kind) 182 return True 183 184 185def _get_insn_operands(insn): 186 """For each operand, define: 187 - type 188 - name 189 - reg_operand_info 190 - initializer 191 - asm_arg 192 """ 193 res = [] 194 r = 0 195 # Int3, Lfence, Mfence, Sfence, and UD2 have side effects not related to arguments. 196 side_effects = insn['name'] in ('Int3', 'Lfence', 'Mfence', 'Sfence', 'UD2') 197 for arg in insn.get('args'): 198 kind = arg.get('class') 199 if _is_reg(kind): 200 res.append(_make_reg_operand(r, arg.get('usage'), kind)) 201 r += 1 202 elif asm_defs.is_imm(kind): 203 # We share field for immediate and label in 'insn'. 204 bits = kind[3:] 205 res.append(_make_imm_operand(bits)) 206 elif asm_defs.is_mem_op(kind): 207 # If operand is memory and it's not "use" then we have side_effects 208 if arg['usage'] != 'use': 209 side_effects = True 210 # No insn can have more than one memop. 211 addr_mode = insn.get('addr_mode') 212 assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \ 213 'unknown addressing mode %s' % (addr_mode) 214 if addr_mode in ('BaseDisp', 'BaseIndexDisp'): 215 res.append(_make_reg_operand(r, 'use', 'GeneralReg32')) 216 r += 1 217 218 if addr_mode in ('IndexDisp', 'BaseIndexDisp'): 219 res.append(_make_reg_operand(r, 'use', 'GeneralReg32')) 220 r += 1 221 res.append(_make_scale_operand()) 222 223 res.append(_make_disp_operand()) 224 elif asm_defs.is_disp(kind): 225 res.append(_make_disp_operand()) 226 elif asm_defs.is_cond(kind): 227 res.append(_make_cond_operand()) 228 elif asm_defs.is_label(kind): 229 res.append(_make_label_operand()) 230 else: 231 assert False, 'unknown operand class %s' % (kind) 232 return res, side_effects 233 234 235def _get_insn_debug_operands(insn): 236 res = [] 237 r = 0 238 for arg in insn.get('args'): 239 kind = arg.get('class') 240 if _is_reg(kind): 241 if asm_defs.is_greg(kind) or asm_defs.is_xreg(kind): 242 res.append('GetRegOperandDebugString(this, %d)' % (r)) 243 elif asm_defs.is_implicit_reg(kind): 244 res.append('GetImplicitRegOperandDebugString(this, %d)' % (r)) 245 else: 246 assert False, 'unknown register kind %s' % (kind) 247 r += 1 248 elif asm_defs.is_imm(kind): 249 # We share field for immediate and label in 'insn'. 250 res.append('GetImmOperandDebugString(this)') 251 elif asm_defs.is_mem_op(kind): 252 # No insn can have more than one memop. 253 addr_mode = insn.get('addr_mode') 254 if addr_mode == 'Absolute': 255 res.append('GetAbsoluteMemOperandDebugString(this)') 256 elif addr_mode in ('BaseDisp', 'IndexDisp', 'BaseIndexDisp'): 257 res.append('Get%sMemOperandDebugString(this, %d)' % (addr_mode, r)) 258 r += {'BaseDisp': 1, 'IndexDisp': 1, 'BaseIndexDisp': 2}[addr_mode] 259 else: 260 assert False, 'unknown addr_mode %s' % (addr_mode) 261 elif asm_defs.is_disp(kind): 262 # Hack: replace previous reg helper with mem helper. 263 assert res 264 assert res[-1].startswith('GetRegOperandDebugString') 265 res[-1] = 'GetBaseDispMemOperandDebugString' + res[-1][24:] 266 elif asm_defs.is_cond(kind): 267 res.append('GetCondOperandDebugString(this)') 268 elif asm_defs.is_label(kind): 269 res.append('GetLabelOperandDebugString(this)') 270 else: 271 assert False, 'unknown operand class %s' % (kind) 272 return res 273 274 275INDENT = ' ' 276 277 278def _gen_insn_ctor(f, insn): 279 name = insn.get('name') 280 operands, _ = _get_insn_operands(insn) 281 params = ['%s %s' % (op.type, op.name) for op in operands] 282 inits = ['%s%s;' % (INDENT, op.initializer) for op in operands] 283 print('constexpr MachineInsnInfo %s::kInfo;' % (name), file=f) 284 print('%s::%s(%s) : MachineInsnForArch(&kInfo) {' % (name, name, ', '.join(params)), file=f) 285 print('\n'.join(inits), file=f) 286 print('}', file=f) 287 288 289# TODO(b/232598137): Maybe we should just implement generic printing in C++ 290# instead of generating it for every instruction. 291def _gen_insn_debug(f, insn): 292 name = insn.get('name') 293 mnemo = insn.get('mnemo') 294 print('std::string %s::GetDebugString() const {' % (name), file=f) 295 operands = _get_insn_debug_operands(insn) 296 if not operands: 297 print(' return "%s";' % (mnemo), file=f) 298 else: 299 print(' std::string s("%s ");' % (mnemo), file=f) 300 print(' s += %s;' % (operands[0]), file=f) 301 for op in operands[1:]: 302 print(' s += ", ";', file=f) 303 print(' s += %s;' % (op), file=f) 304 # We don't print recovery_bb() since it can be found by edges outgoing from basic block. 305 print(' if (recovery_pc()) {', file=f) 306 print(' s += StringPrintf(" <0x%" PRIxPTR ">", recovery_pc());', file=f) 307 print(' }', file=f) 308 print(' return s;', file=f) 309 print('}', file=f) 310 311 312def _gen_insn_emit(f, insn): 313 name = insn.get('name') 314 asm = insn.get('asm') 315 operands, _ = _get_insn_operands(insn) 316 asm_args = [op.asm_arg for op in operands if op.asm_arg] 317 print('void %s::Emit(CodeEmitter* as) const {' % (name), file=f) 318 print('%sas->%s(%s);' % (INDENT, asm, ', '.join(asm_args)), file=f) 319 print('}', file=f) 320 321 322def _gen_insn_class(f, insn): 323 name = insn.get('name') 324 operands, side_effects = _get_insn_operands(insn) 325 regs = [op.reg_operand_info for op in operands if op.reg_operand_info] 326 if side_effects: 327 kind = 'kMachineInsnSideEffects' 328 else: 329 kind = 'kMachineInsnDefault' 330 params = ['%s %s' % (op.type, op.name) for op in operands] 331 print('class %s : public MachineInsnForArch {' % (name), file=f) 332 print(' public:', file=f) 333 print(' explicit %s(%s);' % (name, ', '.join(params)), file=f) 334 print(' static constexpr MachineInsnInfo kInfo =', file=f) 335 print(' MachineInsnInfo({kMachineOp%s,' % (name), file=f) 336 print(' %d,' % (len(regs)), file=f) 337 print(' {%s},' % (', '.join(regs)), file=f) 338 print(' %s});' % (kind), file=f) 339 print(' static constexpr int NumRegOperands() { return kInfo.num_reg_operands; }', file=f) 340 print(' static constexpr const MachineRegKind& RegKindAt(int i) { return kInfo.reg_kinds[i]; }', file=f) 341 print(' std::string GetDebugString() const override;', file=f) 342 print(' void Emit(CodeEmitter* as) const override;', file=f) 343 print('};', file=f) 344 345 346def gen_code_2_cc(out, arch, insns): 347 with open(out, 'w') as f: 348 for insn in insns: 349 _gen_insn_ctor(f, insn) 350 351 352def gen_code_debug_cc(out, arch, insns): 353 with open(out, 'w') as f: 354 print("""\ 355// This file automatically generated by gen_lir.py 356// DO NOT EDIT! 357 358#include "berberis/base/stringprintf.h" 359#include "berberis/backend/%s/code_debug.h" 360 361namespace berberis { 362 363namespace %s { 364""" % (arch, arch), file=f) 365 for insn in insns: 366 _gen_insn_debug(f, insn) 367 print("""\ 368 369} // namespace %s 370 371} // namespace berberis""" % (arch), file=f) 372 373 374def gen_code_emit_cc(out, arch, insns): 375 with open(out, 'w') as f: 376 print("""\ 377// This file automatically generated by gen_lir.py 378// DO NOT EDIT! 379 380#include "berberis/backend/code_emitter.h" 381#include "berberis/backend/%s/code_emit.h" 382 383namespace berberis { 384 385namespace %s { 386""" % (arch, arch), file=f) 387 for insn in insns: 388 _gen_insn_emit(f, insn) 389 print("""\ 390 391} // namespace %s 392 393} // namespace berberis""" % (arch), file=f) 394 395 396def gen_machine_info_h(out, arch, insns): 397 with open(out, 'w') as f: 398 for insn in insns: 399 name = insn.get('name') 400 print('using %s = %s;' % (name, name), file=f) 401 402 403def gen_machine_opcode_h(out, arch, insns): 404 with open(out, 'w') as f: 405 for insn in insns: 406 name = insn.get('name') 407 print('kMachineOp%s,' % (name), file=f) 408 409 410def _gen_mem_insn_groups(f, insns): 411 # Build a dictionary to map a memory insn group name to another dictionary, 412 # which in turn maps an addressing mode to an individual memory insn. 413 groups = {} 414 for i in insns: 415 group_name = i.get('mem_group_name') 416 if group_name: 417 groups.setdefault(group_name, {})[i.get('addr_mode')] = i.get('name') 418 419 for group_name in sorted(groups): 420 # The order of the addressing modes here is important. It must 421 # match what MemInsns expects. 422 mem_insns = [groups[group_name][addr_mode] 423 for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')] 424 print('using %s = MemInsns<%s>;' % (group_name, ', '.join(mem_insns)), file=f) 425 426 427def gen_machine_ir_h(out, arch, insns): 428 with open(out, 'w') as f: 429 for insn in insns: 430 _gen_insn_class(f, insn) 431 print('', file=f) 432 _gen_mem_insn_groups(f, insns) 433 434 435def _contains_mem(insn): 436 return any(asm_defs.is_mem_op(arg['class']) for arg in insn.get('args')) 437 438 439def _create_mem_insn(insn, addr_mode): 440 new_insn = insn.copy() 441 macro_name = asm_defs.get_mem_macro_name(insn, addr_mode) 442 new_insn['name'] = macro_name 443 new_insn['addr_mode'] = addr_mode 444 new_insn['asm'] = macro_name 445 new_insn['mem_group_name'] = asm_defs.get_mem_macro_name(insn, '') + 'Insns' 446 return new_insn 447 448 449def _expand_mem_insns(insns): 450 result = [] 451 for insn in insns: 452 if _contains_mem(insn): 453 result.extend([_create_mem_insn(insn, addr_mode) 454 for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')]) 455 result.append(insn) 456 return result 457 458 459def _load_lir_def(allowlist_looked, allowlist_found, asm_def): 460 arch, insns = asm_defs.load_asm_defs(asm_def) 461 insns = _expand_mem_insns(insns) 462 # Mark all instructions to remove and remember instructions we kept 463 for insn in insns: 464 insn_name = insn.get('mem_group_name', insn['name']) 465 if insn_name in allowlist_looked: 466 allowlist_found.add(insn_name) 467 else: 468 insn['skip_lir'] = 1 469 # Filter out disabled instructions. 470 insns = [i for i in insns if not i.get('skip_lir')] 471 return arch, insns 472 473 474def _allowlist_instructions(allowlist_files, machine_ir_intrinsic_binding_files): 475 allowlisted_names = set() 476 for allowlist_file in allowlist_files: 477 with open(allowlist_file) as allowlist_json: 478 for insn_name in json.load(allowlist_json)['insns']: 479 allowlisted_names.add(insn_name) 480 for machine_ir_intrinsic_binding_file in machine_ir_intrinsic_binding_files: 481 with open(machine_ir_intrinsic_binding_file) as machine_ir_intrinsic_binding_json: 482 json_array = json.load(machine_ir_intrinsic_binding_json) 483 # insn of type str is actually part of the file license. 484 while isinstance(json_array[0], str): 485 json_array.pop(0) 486 for insn in json_array: 487 if insn.get('usage', '') != 'interpret-only': 488 allowlisted_names.add(insn['insn']) 489 return allowlisted_names 490 491 492def load_all_lir_defs(allowlist_files, machine_ir_intrinsic_binding_files, lir_defs): 493 allowlist_looked = _allowlist_instructions( 494 allowlist_files, machine_ir_intrinsic_binding_files) 495 allowlist_found = set() 496 arch = None 497 insns = [] 498 macro_insns = [] 499 for lir_def in lir_defs: 500 def_arch, def_insns = _load_lir_def(allowlist_looked, allowlist_found, lir_def) 501 if arch and not arch.startswith('common_'): 502 assert def_arch is None or arch == def_arch 503 else: 504 arch = def_arch 505 if def_arch is None: 506 macro_insns.extend(def_insns) 507 else: 508 insns.extend(def_insns) 509 for insn in insns: 510 _check_insn_defs(insn) 511 # Some macroinstructions can only be used in Lite translator for now. Ignore them here. 512 insns.extend(insn for insn in macro_insns if _check_insn_defs(insn, True)) 513 assert allowlist_looked == allowlist_found 514 return arch, insns 515