1#!/usr/bin/python
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Generate LIR files out of the definition file.
18
19* Operand usage
20
21Register allocator needs operand usage to learn which operands can share the
22same register.
23
24To understand register sharing options, register allocator assumes insn works
25in these steps:
26- read input operands
27- do the job
28- write output operands
29
30So, input-output operands should have dedicated registers, while input-only
31operands can share registers with output-only operands.
32
33There might be an exception when output-only operand is written before all
34input-only operands are read, so its register can't be shared. Such operands
35are usually referred as output-only-early-clobber operands.
36
37For register sharing, output-only-early-clobber operand is the same as
38input-output operand, but it is unnatural to describe output-only as
39input-output, so we use a special keyword for it.
40
41Finally, keywords are:
42use - input-only
43def - output-only
44def_early_clobber - output-only-early-clobber
45use_def - input-output
46
47* Scratch operands
48
49Scratch operands are actually output operands - indeed, their original value
50is not used and they get some new value after the insn is done. However, they
51are usually written before all input operands are read, so it makes sense to
52describe scratch operands as output-only-early-clobber.
53"""
54
55import asm_defs
56import json
57import sys
58
59
60def _is_reg(arg_type):
61  return (asm_defs.is_greg(arg_type) or
62          asm_defs.is_xreg(arg_type) or
63          asm_defs.is_implicit_reg(arg_type))
64
65
66class Operand(object):
67  pass
68
69
70def _get_reg_operand_info(usage, kind):
71  if usage == 'use':
72    return '{ &k%s, MachineRegKind::kUse }' % (kind)
73  if usage == 'def':
74    return '{ &k%s, MachineRegKind::kDef }' % (kind)
75  if usage == 'use_def':
76    return '{ &k%s, MachineRegKind::kUseDef }' % (kind)
77  if usage == 'def_early_clobber':
78    return '{ &k%s, MachineRegKind::kDefEarlyClobber }' % (kind)
79  assert False, 'unknown operand usage %s' % (usage)
80
81
82def _make_reg_operand(r, usage, kind):
83   op = Operand()
84   op.type = 'MachineReg'
85   op.name = 'r%d' % (r)
86   op.reg_operand_info = _get_reg_operand_info(usage, kind)
87   op.initializer = 'SetRegAt(%d, r%d)' % (r, r)
88   if asm_defs.is_greg(kind):
89     op.asm_arg = 'GetGReg(RegAt(%d))' % (r)
90   elif asm_defs.is_xreg(kind):
91     op.asm_arg = 'GetXReg(RegAt(%d))' % (r)
92   elif asm_defs.is_implicit_reg(kind):
93     op.asm_arg = None
94   else:
95     assert False, 'unknown register kind %s' % (kind)
96   return op
97
98
99def _make_imm_operand(bits):
100  op = Operand()
101  op.type = 'int%s_t' % (bits)
102  op.name = 'imm'
103  op.reg_operand_info = None
104  op.initializer = 'set_imm(imm)'
105  op.asm_arg = 'static_cast<%s>(imm())' % (op.type)
106  return op
107
108
109def _make_scale_operand():
110  op = Operand()
111  op.type = 'MachineMemOperandScale'
112  op.name = 'scale'
113  op.reg_operand_info = None
114  op.initializer = 'set_scale(scale)'
115  op.asm_arg = 'ToScaleFactor(scale())'
116  return op
117
118
119def _make_disp_operand():
120  op = Operand()
121  op.type = 'uint32_t'
122  op.name = 'disp'
123  op.reg_operand_info = None
124  op.initializer = 'set_disp(disp)'
125  op.asm_arg = 'disp()'
126  return op
127
128
129def _make_cond_operand():
130  op = Operand()
131  op.type = 'Assembler::Condition'
132  op.name = 'cond'
133  op.reg_operand_info = None
134  op.initializer = 'set_cond(cond)'
135  op.asm_arg = 'cond()'
136  return op
137
138
139def _make_label_operand():
140  op = Operand()
141  # We never have both immediate and Label in same insn.
142  op.type = 'Label*'
143  op.name = 'label'
144  op.reg_operand_info = None
145  op.initializer = 'set_imm(reinterpret_cast<uintptr_t>(label))'
146  op.asm_arg = '*reinterpret_cast<Label*>(imm())'
147  return op
148
149
150def _check_insn_defs(insn, skip_unsupported=False):
151  seen_imm = False
152  seen_memop = False
153  seen_disp = False
154  for arg in insn.get('args'):
155    kind = arg.get('class')
156    if _is_reg(kind):
157      pass
158    elif asm_defs.is_imm(kind):
159      # We share field for immediate and label in 'insn'.
160      assert not seen_imm
161      seen_imm = True
162    elif asm_defs.is_mem_op(kind):
163      # No insn can have more than one memop.
164      assert not seen_memop
165      addr_mode = insn.get('addr_mode')
166      if skip_unsupported:
167        if addr_mode not in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'):
168          return False
169      assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \
170        'unknown addressing mode %s' % (addr_mode)
171      seen_memop = True
172    elif asm_defs.is_disp(kind):
173      assert not seen_disp
174      seen_disp = True
175    elif asm_defs.is_cond(kind):
176      pass
177    elif asm_defs.is_label(kind):
178      assert not seen_imm
179      seen_imm = True
180    else:
181      assert False, 'unknown operand class %s' % (kind)
182  return True
183
184
185def _get_insn_operands(insn):
186  """For each operand, define:
187  - type
188  - name
189  - reg_operand_info
190  - initializer
191  - asm_arg
192  """
193  res = []
194  r = 0
195  # Int3, Lfence, Mfence, Sfence, and UD2 have side effects not related to arguments.
196  side_effects = insn['name'] in ('Int3', 'Lfence', 'Mfence', 'Sfence', 'UD2')
197  for arg in insn.get('args'):
198    kind = arg.get('class')
199    if _is_reg(kind):
200      res.append(_make_reg_operand(r, arg.get('usage'), kind))
201      r += 1
202    elif asm_defs.is_imm(kind):
203      # We share field for immediate and label in 'insn'.
204      bits = kind[3:]
205      res.append(_make_imm_operand(bits))
206    elif asm_defs.is_mem_op(kind):
207      # If operand is memory and it's not "use" then we have side_effects
208      if arg['usage'] != 'use':
209        side_effects = True
210      # No insn can have more than one memop.
211      addr_mode = insn.get('addr_mode')
212      assert addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp'), \
213        'unknown addressing mode %s' % (addr_mode)
214      if addr_mode in ('BaseDisp', 'BaseIndexDisp'):
215        res.append(_make_reg_operand(r, 'use', 'GeneralReg32'))
216        r += 1
217
218      if addr_mode in ('IndexDisp', 'BaseIndexDisp'):
219        res.append(_make_reg_operand(r, 'use', 'GeneralReg32'))
220        r += 1
221        res.append(_make_scale_operand())
222
223      res.append(_make_disp_operand())
224    elif asm_defs.is_disp(kind):
225      res.append(_make_disp_operand())
226    elif asm_defs.is_cond(kind):
227      res.append(_make_cond_operand())
228    elif asm_defs.is_label(kind):
229      res.append(_make_label_operand())
230    else:
231      assert False, 'unknown operand class %s' % (kind)
232  return res, side_effects
233
234
235def _get_insn_debug_operands(insn):
236  res = []
237  r = 0
238  for arg in insn.get('args'):
239    kind = arg.get('class')
240    if _is_reg(kind):
241      if asm_defs.is_greg(kind) or asm_defs.is_xreg(kind):
242        res.append('GetRegOperandDebugString(this, %d)' % (r))
243      elif asm_defs.is_implicit_reg(kind):
244        res.append('GetImplicitRegOperandDebugString(this, %d)' % (r))
245      else:
246        assert False, 'unknown register kind %s' % (kind)
247      r += 1
248    elif asm_defs.is_imm(kind):
249      # We share field for immediate and label in 'insn'.
250      res.append('GetImmOperandDebugString(this)')
251    elif asm_defs.is_mem_op(kind):
252      # No insn can have more than one memop.
253      addr_mode = insn.get('addr_mode')
254      if addr_mode == 'Absolute':
255        res.append('GetAbsoluteMemOperandDebugString(this)')
256      elif addr_mode in ('BaseDisp', 'IndexDisp', 'BaseIndexDisp'):
257        res.append('Get%sMemOperandDebugString(this, %d)' % (addr_mode, r))
258        r += {'BaseDisp': 1, 'IndexDisp': 1, 'BaseIndexDisp': 2}[addr_mode]
259      else:
260        assert False, 'unknown addr_mode %s' % (addr_mode)
261    elif asm_defs.is_disp(kind):
262      # Hack: replace previous reg helper with mem helper.
263      assert res
264      assert res[-1].startswith('GetRegOperandDebugString')
265      res[-1] = 'GetBaseDispMemOperandDebugString' + res[-1][24:]
266    elif asm_defs.is_cond(kind):
267      res.append('GetCondOperandDebugString(this)')
268    elif asm_defs.is_label(kind):
269      res.append('GetLabelOperandDebugString(this)')
270    else:
271      assert False, 'unknown operand class %s' % (kind)
272  return res
273
274
275INDENT = '  '
276
277
278def _gen_insn_ctor(f, insn):
279  name = insn.get('name')
280  operands, _ = _get_insn_operands(insn)
281  params = ['%s %s' % (op.type, op.name) for op in operands]
282  inits = ['%s%s;' % (INDENT, op.initializer) for op in operands]
283  print('constexpr MachineInsnInfo %s::kInfo;' % (name), file=f)
284  print('%s::%s(%s) : MachineInsnForArch(&kInfo) {' % (name, name, ', '.join(params)), file=f)
285  print('\n'.join(inits), file=f)
286  print('}', file=f)
287
288
289# TODO(b/232598137): Maybe we should just implement generic printing in C++
290# instead of generating it for every instruction.
291def _gen_insn_debug(f, insn):
292  name = insn.get('name')
293  mnemo = insn.get('mnemo')
294  print('std::string %s::GetDebugString() const {' % (name), file=f)
295  operands = _get_insn_debug_operands(insn)
296  if not operands:
297    print('  return "%s";' % (mnemo), file=f)
298  else:
299    print('  std::string s("%s ");' % (mnemo), file=f)
300    print('  s += %s;' % (operands[0]), file=f)
301    for op in operands[1:]:
302      print('  s += ", ";', file=f)
303      print('  s += %s;' % (op), file=f)
304    # We don't print recovery_bb() since it can be found by edges outgoing from basic block.
305    print('  if (recovery_pc()) {', file=f)
306    print('    s += StringPrintf(" <0x%" PRIxPTR ">", recovery_pc());', file=f)
307    print('  }', file=f)
308    print('  return s;', file=f)
309  print('}', file=f)
310
311
312def _gen_insn_emit(f, insn):
313  name = insn.get('name')
314  asm = insn.get('asm')
315  operands, _ = _get_insn_operands(insn)
316  asm_args = [op.asm_arg for op in operands if op.asm_arg]
317  print('void %s::Emit(CodeEmitter* as) const {' % (name), file=f)
318  print('%sas->%s(%s);' % (INDENT, asm, ', '.join(asm_args)), file=f)
319  print('}', file=f)
320
321
322def _gen_insn_class(f, insn):
323  name = insn.get('name')
324  operands, side_effects = _get_insn_operands(insn)
325  regs = [op.reg_operand_info for op in operands if op.reg_operand_info]
326  if side_effects:
327    kind = 'kMachineInsnSideEffects'
328  else:
329    kind = 'kMachineInsnDefault'
330  params = ['%s %s' % (op.type, op.name) for op in operands]
331  print('class %s : public MachineInsnForArch {' % (name), file=f)
332  print(' public:', file=f)
333  print('  explicit %s(%s);' % (name, ', '.join(params)), file=f)
334  print('  static constexpr MachineInsnInfo kInfo =', file=f)
335  print('      MachineInsnInfo({kMachineOp%s,' % (name), file=f)
336  print('                       %d,' % (len(regs)), file=f)
337  print('                       {%s},' % (', '.join(regs)), file=f)
338  print('                       %s});' % (kind), file=f)
339  print('  static constexpr int NumRegOperands() { return kInfo.num_reg_operands; }', file=f)
340  print('  static constexpr const MachineRegKind& RegKindAt(int i) { return kInfo.reg_kinds[i]; }', file=f)
341  print('  std::string GetDebugString() const override;', file=f)
342  print('  void Emit(CodeEmitter* as) const override;', file=f)
343  print('};', file=f)
344
345
346def gen_code_2_cc(out, arch, insns):
347  with open(out, 'w') as f:
348    for insn in insns:
349      _gen_insn_ctor(f, insn)
350
351
352def gen_code_debug_cc(out, arch, insns):
353  with open(out, 'w') as f:
354    print("""\
355// This file automatically generated by gen_lir.py
356// DO NOT EDIT!
357
358#include "berberis/base/stringprintf.h"
359#include "berberis/backend/%s/code_debug.h"
360
361namespace berberis {
362
363namespace %s {
364""" % (arch, arch), file=f)
365    for insn in insns:
366      _gen_insn_debug(f, insn)
367    print("""\
368
369}  // namespace %s
370
371}  // namespace berberis""" % (arch), file=f)
372
373
374def gen_code_emit_cc(out, arch, insns):
375  with open(out, 'w') as f:
376    print("""\
377// This file automatically generated by gen_lir.py
378// DO NOT EDIT!
379
380#include "berberis/backend/code_emitter.h"
381#include "berberis/backend/%s/code_emit.h"
382
383namespace berberis {
384
385namespace %s {
386""" % (arch, arch), file=f)
387    for insn in insns:
388      _gen_insn_emit(f, insn)
389    print("""\
390
391}  // namespace %s
392
393}  // namespace berberis""" % (arch), file=f)
394
395
396def gen_machine_info_h(out, arch, insns):
397  with open(out, 'w') as f:
398    for insn in insns:
399      name = insn.get('name')
400      print('using %s = %s;' % (name, name), file=f)
401
402
403def gen_machine_opcode_h(out, arch, insns):
404  with open(out, 'w') as f:
405    for insn in insns:
406      name = insn.get('name')
407      print('kMachineOp%s,' % (name), file=f)
408
409
410def _gen_mem_insn_groups(f, insns):
411  # Build a dictionary to map a memory insn group name to another dictionary,
412  # which in turn maps an addressing mode to an individual memory insn.
413  groups = {}
414  for i in insns:
415    group_name = i.get('mem_group_name')
416    if group_name:
417      groups.setdefault(group_name, {})[i.get('addr_mode')] = i.get('name')
418
419  for group_name in sorted(groups):
420    # The order of the addressing modes here is important.  It must
421    # match what MemInsns expects.
422    mem_insns = [groups[group_name][addr_mode]
423                 for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')]
424    print('using %s = MemInsns<%s>;' % (group_name, ', '.join(mem_insns)), file=f)
425
426
427def gen_machine_ir_h(out, arch, insns):
428  with open(out, 'w') as f:
429    for insn in insns:
430      _gen_insn_class(f, insn)
431    print('', file=f)
432    _gen_mem_insn_groups(f, insns)
433
434
435def _contains_mem(insn):
436  return any(asm_defs.is_mem_op(arg['class']) for arg in insn.get('args'))
437
438
439def _create_mem_insn(insn, addr_mode):
440  new_insn = insn.copy()
441  macro_name = asm_defs.get_mem_macro_name(insn, addr_mode)
442  new_insn['name'] = macro_name
443  new_insn['addr_mode'] = addr_mode
444  new_insn['asm'] = macro_name
445  new_insn['mem_group_name'] = asm_defs.get_mem_macro_name(insn, '') + 'Insns'
446  return new_insn
447
448
449def _expand_mem_insns(insns):
450  result = []
451  for insn in insns:
452    if _contains_mem(insn):
453      result.extend([_create_mem_insn(insn, addr_mode)
454                     for addr_mode in ('Absolute', 'BaseDisp', 'IndexDisp', 'BaseIndexDisp')])
455    result.append(insn)
456  return result
457
458
459def _load_lir_def(allowlist_looked, allowlist_found, asm_def):
460  arch, insns = asm_defs.load_asm_defs(asm_def)
461  insns = _expand_mem_insns(insns)
462  # Mark all instructions to remove and remember instructions we kept
463  for insn in insns:
464    insn_name = insn.get('mem_group_name', insn['name'])
465    if insn_name in allowlist_looked:
466      allowlist_found.add(insn_name)
467    else:
468      insn['skip_lir'] = 1
469  # Filter out disabled instructions.
470  insns = [i for i in insns if not i.get('skip_lir')]
471  return arch, insns
472
473
474def _allowlist_instructions(allowlist_files, machine_ir_intrinsic_binding_files):
475  allowlisted_names = set()
476  for allowlist_file in allowlist_files:
477    with open(allowlist_file) as allowlist_json:
478      for insn_name in json.load(allowlist_json)['insns']:
479        allowlisted_names.add(insn_name)
480  for machine_ir_intrinsic_binding_file in machine_ir_intrinsic_binding_files:
481    with open(machine_ir_intrinsic_binding_file) as machine_ir_intrinsic_binding_json:
482      json_array = json.load(machine_ir_intrinsic_binding_json)
483        # insn of type str is actually part of the file license.
484      while isinstance(json_array[0], str):
485        json_array.pop(0)
486      for insn in json_array:
487        if insn.get('usage', '') != 'interpret-only':
488          allowlisted_names.add(insn['insn'])
489  return allowlisted_names
490
491
492def load_all_lir_defs(allowlist_files, machine_ir_intrinsic_binding_files, lir_defs):
493  allowlist_looked = _allowlist_instructions(
494      allowlist_files, machine_ir_intrinsic_binding_files)
495  allowlist_found = set()
496  arch = None
497  insns = []
498  macro_insns = []
499  for lir_def in lir_defs:
500    def_arch, def_insns = _load_lir_def(allowlist_looked, allowlist_found, lir_def)
501    if arch and not arch.startswith('common_'):
502      assert def_arch is None or arch == def_arch
503    else:
504      arch = def_arch
505    if def_arch is None:
506      macro_insns.extend(def_insns)
507    else:
508      insns.extend(def_insns)
509  for insn in insns:
510    _check_insn_defs(insn)
511  # Some macroinstructions can only be used in Lite translator for now. Ignore them here.
512  insns.extend(insn for insn in macro_insns if _check_insn_defs(insn, True))
513  assert allowlist_looked == allowlist_found
514  return arch, insns
515