1#!/usr/bin/python
2#
3# Copyright (C) 2018 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""Parse assembler definition file.
19
20Definition JSON file for this script have following form:
21{
22  "arch": "XXX",
23  "insns": [
24    {
25      "name": "ShlbRegReg",
26      "args": [
27         {"class": "GeneralReg8", "usage": "use_def"},
28         {"class": "RCX", "usage": "use"},
29         {"class": "FLAGS", "usage": "def"}
30       ],
31       "asm": "ShlbByCl",
32       "mnemo": "SHLB"
33    },
34    ...
35  ]
36'arch' is primarily used for C++ namespace in LIR generator, and is ignored by
37this script.
38
39'insn' is array of objects, each describing single instruction variant.
40Each instruction is an object with following fields:
41  'name' - instruction unique name, used in LIR generator, typical name is
42           InsnOp1Op2, where 'Insn' is instruction name, 'Op1', 'Op2' -
43           operand types, such as Imm, Reg, Mem(Op), Base, Disp.
44  'args' - described ordered list of instruction arguments.
45           for each argument 'class' (any GP register, fixed GP register,
46           any XMM register, immediate, memory operand, flags register)
47           and how it is treated by an instruction (used, defined,
48           both used and defined)
49  'asm' - which internal assembler's mnemonic is used
50  'opcodes' - optional flag for autogeneration: if opcode bytes are specified
51              then implementation would be automatically generated
52  'reg_to_rm' - optional flag to make RM field in ModRegRM byte destination
53                (most instructions with two registers use reg as destination)
54  'mnemo' - how instruction shall be named in LIR dumps (ignored here)
55
56Memory operand for assembler instructions can be described as either opaque
57Operand class, which provides full power of x86 addressing modes, or as
58explicit BaseDisp format, which translates to reg+disp form.
59
60For some instructions (such as pop, push, jmp reg) exact register width is not
61specified, and 'GeneralReg' class is used, as same encoding is used for 32 and
6264 bit operands, depending on current CPU mode.
63
64This script produces inline file for internal assembler's header, such as for
65above example it would yield single line
66
67  void ShlbByCl(Register);
68
69Fixed arguments (such as 'RCX') and flags ('FLAGS') are ignored when generating
70assembler's header, while for others emitted an argument of type depending on
71argument's class.
72"""
73
74import copy
75import json
76
77
78def is_imm(arg_type):
79  return arg_type in ('Imm2', 'Imm8', 'Imm16', 'Imm32', 'Imm64')
80
81
82def is_disp(arg_type):
83  return arg_type == 'Disp'
84
85
86def is_mem_op(arg_type):
87  return arg_type in ('Mem8', 'Mem16', 'Mem32', 'Mem64', 'Mem128',
88                      'MemX87', 'MemX8716', 'MemX8732', 'MemX8764', 'MemX8780',
89                      'VecMem32', 'VecMem64', 'VecMem128')
90
91
92def is_cond(arg_type):
93  return arg_type == 'Cond'
94
95
96def is_label(arg_type):
97  return arg_type == 'Label'
98
99
100def is_x87reg(arg_type):
101  return arg_type == 'RegX87'
102
103
104def is_greg(arg_type):
105  return arg_type in ('GeneralReg',
106                      'GeneralReg8', 'GeneralReg16',
107                      'GeneralReg32', 'GeneralReg64')
108
109
110def is_xreg(arg_type):
111  return arg_type in ('XmmReg',
112                      'VecReg64', 'VecReg128',
113                      'FpReg32', 'FpReg64')
114
115
116# Operands of this type are NOT passed to assembler
117def is_implicit_reg(arg_type):
118  return arg_type in ('RAX', 'EAX', 'AX', 'AL',
119                      'RCX', 'ECX', 'CL', 'ST', 'ST1',
120                      'RDX', 'EDX', 'DX', 'CC',
121                      'RBX', 'EBX', 'BX', 'SW',
122                      'RDI', 'RSI', 'RSP', 'FLAGS')
123
124
125def exactly_one_of(iterable):
126  return sum(1 for elem in iterable if elem) == 1
127
128
129def get_mem_macro_name(insn, addr_mode = None):
130  macro_name = insn.get('asm')
131  if macro_name.endswith('ByCl'):
132    macro_name = macro_name[:-4]
133  for arg in insn['args']:
134    clazz = arg['class']
135    # Don't reflect FLAGS or Conditions or Labels in the name - we don't ever
136    # have two different instructions where these cause the difference.
137    if clazz == 'FLAGS' or is_cond(clazz) or is_label(clazz):
138      pass
139    elif is_x87reg(clazz) or is_greg(clazz) or is_implicit_reg(clazz):
140      macro_name += 'Reg'
141    elif is_xreg(clazz):
142      macro_name += 'XReg'
143    elif is_imm(clazz):
144      macro_name += 'Imm'
145    elif is_mem_op(clazz):
146      if addr_mode is not None:
147        macro_name += 'Mem' + addr_mode
148      else:
149        macro_name += 'Op'
150    else:
151      raise Exception('arg type %s is not supported' % clazz)
152  return macro_name
153
154
155def _expand_name(insn, stem, encoding = {}):
156  # Make deep copy of the instruction to make sure consumers could treat them
157  # as independent entities and add/remove marks freely.
158  #
159  # JSON never have "merged" objects thus having them in result violates
160  # expectations.
161  expanded_insn = copy.deepcopy(insn)
162  expanded_insn['asm'] = stem
163  expanded_insn['name'] = get_mem_macro_name(expanded_insn)
164  expanded_insn['mnemo'] = stem.upper()
165  expanded_insn.update(encoding)
166  return expanded_insn
167
168
169def _expand_insn_by_encodings(insns):
170  expanded_insns = []
171  for insn in insns:
172    if insn.get('encodings'):
173      assert all((f not in insn) for f in ['stems', 'name', 'asm', 'mnemo'])
174      # If we have encoding then we must have at least opcodes
175      assert all('opcodes' in encoding for _, encoding in insn['encodings'].items())
176      expanded_insns.extend([_expand_name(insn, stem, encoding)
177                            for stem, encoding in insn['encodings'].items()])
178    elif insn.get('stems'):
179      assert all((f not in insn) for f in ['encoding', 'name', 'asm', 'mnemo'])
180      expanded_insns.extend([_expand_name(insn, stem)
181                            for stem in insn['stems']])
182    else:
183      assert all((f in insn) for f in ['name', 'asm', 'mnemo'])
184      expanded_insns.append(insn)
185  return expanded_insns
186
187
188def _expand_insns_by_operands(insns):
189  expanded_insns = []
190  for insn in insns:
191    split_done = False
192    for arg in insn['args']:
193      if '/' in arg['class']:
194        assert not split_done
195        operand_classes = arg['class'].split('/')
196        for subclass in operand_classes:
197          arg['class'] = subclass
198          expanded_insn = copy.deepcopy(insn)
199          expanded_insns.append(expanded_insn)
200        split_done = True
201    if not split_done:
202      expanded_insns.append(insn)
203  return expanded_insns
204
205
206def load_asm_defs(asm_def):
207  result = []
208  with open(asm_def) as asm:
209    obj = json.load(asm)
210    insns = obj.get('insns')
211    insns = _expand_insns_by_operands(insns)
212    insns = _expand_insn_by_encodings(insns)
213    insns = sorted(insns, key=lambda i: i.get('asm'))
214    result.extend(insns)
215  return obj.get('arch'), result
216