1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "assembler_x86_64.h"
18
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 namespace art HIDDEN {
25 namespace x86_64 {
26
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28 return os << reg.AsRegister();
29 }
30
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32 return os << reg.AsFloatRegister();
33 }
34
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36 return os << "ST" << static_cast<int>(reg);
37 }
38
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40 switch (addr.mod()) {
41 case 0:
42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43 return os << "(%" << addr.cpu_rm() << ")";
44 } else if (addr.base() == RBP) {
45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46 << "," << (1 << addr.scale()) << ")";
47 }
48 return os << "(%" << addr.cpu_base() << ",%"
49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50 case 1:
51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53 }
54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56 case 2:
57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59 }
60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62 default:
63 return os << "<address?>";
64 }
65 }
66
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68 if (has_AVX_ || has_AVX2_) {
69 return true;
70 }
71 return false;
72 }
73
74
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77 EmitOptionalRex32(reg);
78 EmitUint8(0xFF);
79 EmitRegisterOperand(2, reg.LowBits());
80 }
81
82
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85 EmitOptionalRex32(address);
86 EmitUint8(0xFF);
87 EmitOperand(2, address);
88 }
89
90
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93 EmitUint8(0xE8);
94 static const int kSize = 5;
95 // Offset by one because we already have emitted the opcode.
96 EmitLabel(label, kSize - 1);
97 }
98
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101 EmitOptionalRex32(reg);
102 EmitUint8(0x50 + reg.LowBits());
103 }
104
105
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108 EmitOptionalRex32(address);
109 EmitUint8(0xFF);
110 EmitOperand(6, address);
111 }
112
113
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116 CHECK(imm.is_int32()); // pushq only supports 32b immediate.
117 if (imm.is_int8()) {
118 EmitUint8(0x6A);
119 EmitUint8(imm.value() & 0xFF);
120 } else {
121 EmitUint8(0x68);
122 EmitImmediate(imm);
123 }
124 }
125
126
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129 EmitOptionalRex32(reg);
130 EmitUint8(0x58 + reg.LowBits());
131 }
132
133
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136 EmitOptionalRex32(address);
137 EmitUint8(0x8F);
138 EmitOperand(0, address);
139 }
140
141
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144 if (imm.is_int32()) {
145 // 32 bit. Note: sign-extends.
146 EmitRex64(dst);
147 EmitUint8(0xC7);
148 EmitRegisterOperand(0, dst.LowBits());
149 EmitInt32(static_cast<int32_t>(imm.value()));
150 } else {
151 EmitRex64(dst);
152 EmitUint8(0xB8 + dst.LowBits());
153 EmitInt64(imm.value());
154 }
155 }
156
157
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159 CHECK(imm.is_int32());
160 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161 EmitOptionalRex32(dst);
162 EmitUint8(0xB8 + dst.LowBits());
163 EmitImmediate(imm);
164 }
165
166
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168 CHECK(imm.is_int32());
169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170 EmitRex64(dst);
171 EmitUint8(0xC7);
172 EmitOperand(0, dst);
173 EmitImmediate(imm);
174 }
175
176
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180 EmitRex64(src, dst);
181 EmitUint8(0x89);
182 EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184
185
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188 EmitOptionalRex32(dst, src);
189 EmitUint8(0x8B);
190 EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192
193
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196 EmitRex64(dst, src);
197 EmitUint8(0x8B);
198 EmitOperand(dst.LowBits(), src);
199 }
200
201
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204 EmitOptionalRex32(dst, src);
205 EmitUint8(0x8B);
206 EmitOperand(dst.LowBits(), src);
207 }
208
209
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212 EmitRex64(src, dst);
213 EmitUint8(0x89);
214 EmitOperand(src.LowBits(), dst);
215 }
216
217
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220 EmitOptionalRex32(src, dst);
221 EmitUint8(0x89);
222 EmitOperand(src.LowBits(), dst);
223 }
224
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227 EmitOptionalRex32(dst);
228 EmitUint8(0xC7);
229 EmitOperand(0, dst);
230 EmitImmediate(imm);
231 }
232
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235 EmitOptionalRex32(src, dst);
236 EmitUint8(0x0F);
237 EmitUint8(0xC3);
238 EmitOperand(src.LowBits(), dst);
239 }
240
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243 EmitRex64(src, dst);
244 EmitUint8(0x0F);
245 EmitUint8(0xC3);
246 EmitOperand(src.LowBits(), dst);
247 }
248
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250 cmov(c, dst, src, true);
251 }
252
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256 EmitUint8(0x0F);
257 EmitUint8(0x40 + c);
258 EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260
261
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264 if (is64bit) {
265 EmitRex64(dst, src);
266 } else {
267 EmitOptionalRex32(dst, src);
268 }
269 EmitUint8(0x0F);
270 EmitUint8(0x40 + c);
271 EmitOperand(dst.LowBits(), src);
272 }
273
274
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277 EmitOptionalByteRegNormalizingRex32(dst, src);
278 EmitUint8(0x0F);
279 EmitUint8(0xB6);
280 EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282
283
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286 // Byte register is only in the source register form, so we don't use
287 // EmitOptionalByteRegNormalizingRex32(dst, src);
288 EmitOptionalRex32(dst, src);
289 EmitUint8(0x0F);
290 EmitUint8(0xB6);
291 EmitOperand(dst.LowBits(), src);
292 }
293
294
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297 EmitOptionalByteRegNormalizingRex32(dst, src);
298 EmitUint8(0x0F);
299 EmitUint8(0xBE);
300 EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302
303
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306 // Byte register is only in the source register form, so we don't use
307 // EmitOptionalByteRegNormalizingRex32(dst, src);
308 EmitOptionalRex32(dst, src);
309 EmitUint8(0x0F);
310 EmitUint8(0xBE);
311 EmitOperand(dst.LowBits(), src);
312 }
313
314
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316 LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318
319
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322 EmitOptionalByteRegNormalizingRex32(src, dst);
323 EmitUint8(0x88);
324 EmitOperand(src.LowBits(), dst);
325 }
326
327
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330 EmitOptionalRex32(dst);
331 EmitUint8(0xC6);
332 EmitOperand(Register::RAX, dst);
333 CHECK(imm.is_int8());
334 EmitUint8(imm.value() & 0xFF);
335 }
336
337
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340 EmitOptionalRex32(dst, src);
341 EmitUint8(0x0F);
342 EmitUint8(0xB7);
343 EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345
346
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349 EmitOptionalRex32(dst, src);
350 EmitUint8(0x0F);
351 EmitUint8(0xB7);
352 EmitOperand(dst.LowBits(), src);
353 }
354
355
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358 EmitOptionalRex32(dst, src);
359 EmitUint8(0x0F);
360 EmitUint8(0xBF);
361 EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363
364
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367 EmitOptionalRex32(dst, src);
368 EmitUint8(0x0F);
369 EmitUint8(0xBF);
370 EmitOperand(dst.LowBits(), src);
371 }
372
373
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375 LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377
378
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381 EmitOperandSizeOverride();
382 EmitOptionalRex32(src, dst);
383 EmitUint8(0x89);
384 EmitOperand(src.LowBits(), dst);
385 }
386
387
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390 EmitOperandSizeOverride();
391 EmitOptionalRex32(dst);
392 EmitUint8(0xC7);
393 EmitOperand(Register::RAX, dst);
394 CHECK(imm.is_uint16() || imm.is_int16());
395 EmitUint8(imm.value() & 0xFF);
396 EmitUint8(imm.value() >> 8);
397 }
398
399
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402 EmitRex64(dst, src);
403 EmitUint8(0x8D);
404 EmitOperand(dst.LowBits(), src);
405 }
406
407
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410 EmitOptionalRex32(dst, src);
411 EmitUint8(0x8D);
412 EmitOperand(dst.LowBits(), src);
413 }
414
415
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417 if (CpuHasAVXorAVX2FeatureFlag()) {
418 vmovaps(dst, src);
419 return;
420 }
421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422 EmitOptionalRex32(dst, src);
423 EmitUint8(0x0F);
424 EmitUint8(0x28);
425 EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427
428
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431 DCHECK(CpuHasAVXorAVX2FeatureFlag());
432 uint8_t byte_zero, byte_one, byte_two;
433 bool is_twobyte_form = true;
434 bool load = dst.NeedsRex();
435 bool store = !load;
436
437 if (src.NeedsRex()&& dst.NeedsRex()) {
438 is_twobyte_form = false;
439 }
440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441 // Instruction VEX Prefix
442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444 if (is_twobyte_form) {
445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446 byte_one = EmitVexPrefixByteOne(rex_bit,
447 vvvv_reg,
448 SET_VEX_L_128,
449 SET_VEX_PP_NONE);
450 } else {
451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452 /*X=*/ false,
453 src.NeedsRex(),
454 SET_VEX_M_0F);
455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456 SET_VEX_L_128,
457 SET_VEX_PP_NONE);
458 }
459 EmitUint8(byte_zero);
460 EmitUint8(byte_one);
461 if (!is_twobyte_form) {
462 EmitUint8(byte_two);
463 }
464 // Instruction Opcode
465 if (is_twobyte_form && store) {
466 EmitUint8(0x29);
467 } else {
468 EmitUint8(0x28);
469 }
470 // Instruction Operands
471 if (is_twobyte_form && store) {
472 EmitXmmRegisterOperand(src.LowBits(), dst);
473 } else {
474 EmitXmmRegisterOperand(dst.LowBits(), src);
475 }
476 }
477
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479 if (CpuHasAVXorAVX2FeatureFlag()) {
480 vmovaps(dst, src);
481 return;
482 }
483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484 EmitOptionalRex32(dst, src);
485 EmitUint8(0x0F);
486 EmitUint8(0x28);
487 EmitOperand(dst.LowBits(), src);
488 }
489
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492 DCHECK(CpuHasAVXorAVX2FeatureFlag());
493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494 uint8_t ByteZero, ByteOne, ByteTwo;
495 bool is_twobyte_form = false;
496 // Instruction VEX Prefix
497 uint8_t rex = src.rex();
498 bool Rex_x = rex & GET_REX_X;
499 bool Rex_b = rex & GET_REX_B;
500 if (!Rex_b && !Rex_x) {
501 is_twobyte_form = true;
502 }
503 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504 if (is_twobyte_form) {
505 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507 vvvv_reg,
508 SET_VEX_L_128,
509 SET_VEX_PP_NONE);
510 } else {
511 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512 Rex_x,
513 Rex_b,
514 SET_VEX_M_0F);
515 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516 SET_VEX_L_128,
517 SET_VEX_PP_NONE);
518 }
519 EmitUint8(ByteZero);
520 EmitUint8(ByteOne);
521 if (!is_twobyte_form) {
522 EmitUint8(ByteTwo);
523 }
524 // Instruction Opcode
525 EmitUint8(0x28);
526 // Instruction Operands
527 EmitOperand(dst.LowBits(), src);
528 }
529
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531 if (CpuHasAVXorAVX2FeatureFlag()) {
532 vmovups(dst, src);
533 return;
534 }
535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536 EmitOptionalRex32(dst, src);
537 EmitUint8(0x0F);
538 EmitUint8(0x10);
539 EmitOperand(dst.LowBits(), src);
540 }
541
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544 DCHECK(CpuHasAVXorAVX2FeatureFlag());
545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546 uint8_t ByteZero, ByteOne, ByteTwo;
547 bool is_twobyte_form = false;
548 // Instruction VEX Prefix
549 uint8_t rex = src.rex();
550 bool Rex_x = rex & GET_REX_X;
551 bool Rex_b = rex & GET_REX_B;
552 if (!Rex_x && !Rex_b) {
553 is_twobyte_form = true;
554 }
555 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556 if (is_twobyte_form) {
557 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559 vvvv_reg,
560 SET_VEX_L_128,
561 SET_VEX_PP_NONE);
562 } else {
563 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564 Rex_x,
565 Rex_b,
566 SET_VEX_M_0F);
567 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568 SET_VEX_L_128,
569 SET_VEX_PP_NONE);
570 }
571 EmitUint8(ByteZero);
572 EmitUint8(ByteOne);
573 if (!is_twobyte_form) {
574 EmitUint8(ByteTwo);
575 }
576 // Instruction Opcode
577 EmitUint8(0x10);
578 // Instruction Operands
579 EmitOperand(dst.LowBits(), src);
580 }
581
582
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584 if (CpuHasAVXorAVX2FeatureFlag()) {
585 vmovaps(dst, src);
586 return;
587 }
588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589 EmitOptionalRex32(src, dst);
590 EmitUint8(0x0F);
591 EmitUint8(0x29);
592 EmitOperand(src.LowBits(), dst);
593 }
594
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597 DCHECK(CpuHasAVXorAVX2FeatureFlag());
598 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599 uint8_t ByteZero, ByteOne, ByteTwo;
600 bool is_twobyte_form = false;
601
602 // Instruction VEX Prefix
603 uint8_t rex = dst.rex();
604 bool Rex_x = rex & GET_REX_X;
605 bool Rex_b = rex & GET_REX_B;
606 if (!Rex_b && !Rex_x) {
607 is_twobyte_form = true;
608 }
609 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610 if (is_twobyte_form) {
611 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613 vvvv_reg,
614 SET_VEX_L_128,
615 SET_VEX_PP_NONE);
616 } else {
617 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618 Rex_x,
619 Rex_b,
620 SET_VEX_M_0F);
621 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622 SET_VEX_L_128,
623 SET_VEX_PP_NONE);
624 }
625 EmitUint8(ByteZero);
626 EmitUint8(ByteOne);
627 if (!is_twobyte_form) {
628 EmitUint8(ByteTwo);
629 }
630 // Instruction Opcode
631 EmitUint8(0x29);
632 // Instruction Operands
633 EmitOperand(src.LowBits(), dst);
634 }
635
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637 if (CpuHasAVXorAVX2FeatureFlag()) {
638 vmovups(dst, src);
639 return;
640 }
641 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642 EmitOptionalRex32(src, dst);
643 EmitUint8(0x0F);
644 EmitUint8(0x11);
645 EmitOperand(src.LowBits(), dst);
646 }
647
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650 DCHECK(CpuHasAVXorAVX2FeatureFlag());
651 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652 uint8_t ByteZero, ByteOne, ByteTwo;
653 bool is_twobyte_form = false;
654
655 // Instruction VEX Prefix
656 uint8_t rex = dst.rex();
657 bool Rex_x = rex & GET_REX_X;
658 bool Rex_b = rex & GET_REX_B;
659 if (!Rex_b && !Rex_x) {
660 is_twobyte_form = true;
661 }
662 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663 if (is_twobyte_form) {
664 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666 vvvv_reg,
667 SET_VEX_L_128,
668 SET_VEX_PP_NONE);
669 } else {
670 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671 Rex_x,
672 Rex_b,
673 SET_VEX_M_0F);
674 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675 SET_VEX_L_128,
676 SET_VEX_PP_NONE);
677 }
678 EmitUint8(ByteZero);
679 EmitUint8(ByteOne);
680 if (!is_twobyte_form) {
681 EmitUint8(ByteTwo);
682 }
683 // Instruction Opcode
684 EmitUint8(0x11);
685 // Instruction Operands
686 EmitOperand(src.LowBits(), dst);
687 }
688
689
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692 EmitUint8(0xF3);
693 EmitOptionalRex32(dst, src);
694 EmitUint8(0x0F);
695 EmitUint8(0x10);
696 EmitOperand(dst.LowBits(), src);
697 }
698
699
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702 EmitUint8(0xF3);
703 EmitOptionalRex32(src, dst);
704 EmitUint8(0x0F);
705 EmitUint8(0x11);
706 EmitOperand(src.LowBits(), dst);
707 }
708
709
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712 EmitUint8(0xF3);
713 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
714 EmitUint8(0x0F);
715 EmitUint8(0x11);
716 EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718
719
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722 EmitRex64(dst, src);
723 EmitUint8(0x63);
724 EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726
727
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730 EmitRex64(dst, src);
731 EmitUint8(0x63);
732 EmitOperand(dst.LowBits(), src);
733 }
734
735
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737 movd(dst, src, true);
738 }
739
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741 movd(dst, src, true);
742 }
743
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746 EmitUint8(0x66);
747 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748 EmitUint8(0x0F);
749 EmitUint8(0x6E);
750 EmitOperand(dst.LowBits(), Operand(src));
751 }
752
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755 EmitUint8(0x66);
756 EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757 EmitUint8(0x0F);
758 EmitUint8(0x7E);
759 EmitOperand(src.LowBits(), Operand(dst));
760 }
761
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764 EmitUint8(0xF3);
765 EmitOptionalRex32(dst, src);
766 EmitUint8(0x0F);
767 EmitUint8(0x58);
768 EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773 EmitUint8(0xF3);
774 EmitOptionalRex32(dst, src);
775 EmitUint8(0x0F);
776 EmitUint8(0x58);
777 EmitOperand(dst.LowBits(), src);
778 }
779
780
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783 EmitUint8(0xF3);
784 EmitOptionalRex32(dst, src);
785 EmitUint8(0x0F);
786 EmitUint8(0x5C);
787 EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789
790
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793 EmitUint8(0xF3);
794 EmitOptionalRex32(dst, src);
795 EmitUint8(0x0F);
796 EmitUint8(0x5C);
797 EmitOperand(dst.LowBits(), src);
798 }
799
800
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803 EmitUint8(0xF3);
804 EmitOptionalRex32(dst, src);
805 EmitUint8(0x0F);
806 EmitUint8(0x59);
807 EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809
810
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813 EmitUint8(0xF3);
814 EmitOptionalRex32(dst, src);
815 EmitUint8(0x0F);
816 EmitUint8(0x59);
817 EmitOperand(dst.LowBits(), src);
818 }
819
820
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823 EmitUint8(0xF3);
824 EmitOptionalRex32(dst, src);
825 EmitUint8(0x0F);
826 EmitUint8(0x5E);
827 EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829
830
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833 EmitUint8(0xF3);
834 EmitOptionalRex32(dst, src);
835 EmitUint8(0x0F);
836 EmitUint8(0x5E);
837 EmitOperand(dst.LowBits(), src);
838 }
839
840
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843 EmitOptionalRex32(dst, src);
844 EmitUint8(0x0F);
845 EmitUint8(0x58);
846 EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848
849
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852 EmitOptionalRex32(dst, src);
853 EmitUint8(0x0F);
854 EmitUint8(0x5C);
855 EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859 DCHECK(CpuHasAVXorAVX2FeatureFlag());
860 bool is_twobyte_form = false;
861 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
862 if (!add_right.NeedsRex()) {
863 is_twobyte_form = true;
864 } else if (!add_left.NeedsRex()) {
865 return vaddps(dst, add_right, add_left);
866 }
867 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
868 X86_64ManagedRegister vvvv_reg =
869 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
870 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
871 if (is_twobyte_form) {
872 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
873 } else {
874 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
875 /*X=*/ false,
876 add_right.NeedsRex(),
877 SET_VEX_M_0F);
878 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
879 }
880 EmitUint8(ByteZero);
881 EmitUint8(ByteOne);
882 if (!is_twobyte_form) {
883 EmitUint8(ByteTwo);
884 }
885 EmitUint8(0x58);
886 EmitXmmRegisterOperand(dst.LowBits(), add_right);
887 }
888
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)889 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
890 DCHECK(CpuHasAVXorAVX2FeatureFlag());
891 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
892 bool is_twobyte_form = false;
893 uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
894 if (!src2.NeedsRex()) {
895 is_twobyte_form = true;
896 }
897 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
898 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
899 if (is_twobyte_form) {
900 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
901 } else {
902 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
903 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
904 }
905 EmitUint8(byte_zero);
906 EmitUint8(byte_one);
907 if (!is_twobyte_form) {
908 EmitUint8(byte_two);
909 }
910 EmitUint8(0x5C);
911 EmitXmmRegisterOperand(dst.LowBits(), src2);
912 }
913
914
mulps(XmmRegister dst,XmmRegister src)915 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
916 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
917 EmitOptionalRex32(dst, src);
918 EmitUint8(0x0F);
919 EmitUint8(0x59);
920 EmitXmmRegisterOperand(dst.LowBits(), src);
921 }
922
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)923 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
924 DCHECK(CpuHasAVXorAVX2FeatureFlag());
925 bool is_twobyte_form = false;
926 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
927 if (!src2.NeedsRex()) {
928 is_twobyte_form = true;
929 } else if (!src1.NeedsRex()) {
930 return vmulps(dst, src2, src1);
931 }
932 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
933 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
934 X86_64ManagedRegister vvvv_reg =
935 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
936 if (is_twobyte_form) {
937 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
938 } else {
939 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
940 /*X=*/ false,
941 src2.NeedsRex(),
942 SET_VEX_M_0F);
943 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
944 }
945 EmitUint8(ByteZero);
946 EmitUint8(ByteOne);
947 if (!is_twobyte_form) {
948 EmitUint8(ByteTwo);
949 }
950 EmitUint8(0x59);
951 EmitXmmRegisterOperand(dst.LowBits(), src2);
952 }
953
divps(XmmRegister dst,XmmRegister src)954 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
956 EmitOptionalRex32(dst, src);
957 EmitUint8(0x0F);
958 EmitUint8(0x5E);
959 EmitXmmRegisterOperand(dst.LowBits(), src);
960 }
961
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)962 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
963 DCHECK(CpuHasAVXorAVX2FeatureFlag());
964 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
965 bool is_twobyte_form = false;
966 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
967 if (!src2.NeedsRex()) {
968 is_twobyte_form = true;
969 }
970 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
971 X86_64ManagedRegister vvvv_reg =
972 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
973 if (is_twobyte_form) {
974 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
975 } else {
976 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
977 /*X=*/ false,
978 src2.NeedsRex(),
979 SET_VEX_M_0F);
980 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
981 }
982 EmitUint8(ByteZero);
983 EmitUint8(ByteOne);
984 if (!is_twobyte_form) {
985 EmitUint8(ByteTwo);
986 }
987 EmitUint8(0x5E);
988 EmitXmmRegisterOperand(dst.LowBits(), src2);
989 }
990
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)991 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
992 DCHECK(CpuHasAVXorAVX2FeatureFlag());
993 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
994 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
995 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
996 X86_64ManagedRegister vvvv_reg =
997 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
998 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
999 /*X=*/ false,
1000 right.NeedsRex(),
1001 SET_VEX_M_0F_38);
1002 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1003 EmitUint8(ByteZero);
1004 EmitUint8(ByteOne);
1005 EmitUint8(ByteTwo);
1006 EmitUint8(0xA9);
1007 EmitXmmRegisterOperand(acc.LowBits(), right);
1008 }
1009
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)1010 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
1011 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1012 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1013 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1014 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1015 X86_64ManagedRegister vvvv_reg =
1016 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
1017 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
1018 /*X=*/ false,
1019 right.NeedsRex(),
1020 SET_VEX_M_0F_38);
1021 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1022 EmitUint8(ByteZero);
1023 EmitUint8(ByteOne);
1024 EmitUint8(ByteTwo);
1025 EmitUint8(0xA9);
1026 EmitXmmRegisterOperand(acc.LowBits(), right);
1027 }
flds(const Address & src)1028 void X86_64Assembler::flds(const Address& src) {
1029 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1030 EmitUint8(0xD9);
1031 EmitOperand(0, src);
1032 }
1033
1034
fsts(const Address & dst)1035 void X86_64Assembler::fsts(const Address& dst) {
1036 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1037 EmitUint8(0xD9);
1038 EmitOperand(2, dst);
1039 }
1040
1041
fstps(const Address & dst)1042 void X86_64Assembler::fstps(const Address& dst) {
1043 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1044 EmitUint8(0xD9);
1045 EmitOperand(3, dst);
1046 }
1047
1048
movapd(XmmRegister dst,XmmRegister src)1049 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1050 if (CpuHasAVXorAVX2FeatureFlag()) {
1051 vmovapd(dst, src);
1052 return;
1053 }
1054 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1055 EmitUint8(0x66);
1056 EmitOptionalRex32(dst, src);
1057 EmitUint8(0x0F);
1058 EmitUint8(0x28);
1059 EmitXmmRegisterOperand(dst.LowBits(), src);
1060 }
1061
1062 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1063 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1064 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1065 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1066 uint8_t ByteZero, ByteOne, ByteTwo;
1067 bool is_twobyte_form = true;
1068
1069 if (src.NeedsRex() && dst.NeedsRex()) {
1070 is_twobyte_form = false;
1071 }
1072 // Instruction VEX Prefix
1073 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1074 bool load = dst.NeedsRex();
1075 if (is_twobyte_form) {
1076 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1077 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1078 ByteOne = EmitVexPrefixByteOne(rex_bit,
1079 vvvv_reg,
1080 SET_VEX_L_128,
1081 SET_VEX_PP_66);
1082 } else {
1083 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1084 /*X=*/ false,
1085 src.NeedsRex(),
1086 SET_VEX_M_0F);
1087 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1088 SET_VEX_L_128,
1089 SET_VEX_PP_66);
1090 }
1091 EmitUint8(ByteZero);
1092 EmitUint8(ByteOne);
1093 if (!is_twobyte_form) {
1094 EmitUint8(ByteTwo);
1095 }
1096 // Instruction Opcode
1097 if (is_twobyte_form && !load) {
1098 EmitUint8(0x29);
1099 } else {
1100 EmitUint8(0x28);
1101 }
1102 // Instruction Operands
1103 if (is_twobyte_form && !load) {
1104 EmitXmmRegisterOperand(src.LowBits(), dst);
1105 } else {
1106 EmitXmmRegisterOperand(dst.LowBits(), src);
1107 }
1108 }
1109
movapd(XmmRegister dst,const Address & src)1110 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1111 if (CpuHasAVXorAVX2FeatureFlag()) {
1112 vmovapd(dst, src);
1113 return;
1114 }
1115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1116 EmitUint8(0x66);
1117 EmitOptionalRex32(dst, src);
1118 EmitUint8(0x0F);
1119 EmitUint8(0x28);
1120 EmitOperand(dst.LowBits(), src);
1121 }
1122
1123 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1124 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1125 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1126 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1127 uint8_t ByteZero, ByteOne, ByteTwo;
1128 bool is_twobyte_form = false;
1129
1130 // Instruction VEX Prefix
1131 uint8_t rex = src.rex();
1132 bool Rex_x = rex & GET_REX_X;
1133 bool Rex_b = rex & GET_REX_B;
1134 if (!Rex_b && !Rex_x) {
1135 is_twobyte_form = true;
1136 }
1137 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1138 if (is_twobyte_form) {
1139 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1140 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1141 vvvv_reg,
1142 SET_VEX_L_128,
1143 SET_VEX_PP_66);
1144 } else {
1145 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1146 Rex_x,
1147 Rex_b,
1148 SET_VEX_M_0F);
1149 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1150 SET_VEX_L_128,
1151 SET_VEX_PP_66);
1152 }
1153 EmitUint8(ByteZero);
1154 EmitUint8(ByteOne);
1155 if (!is_twobyte_form) {
1156 EmitUint8(ByteTwo);
1157 }
1158 // Instruction Opcode
1159 EmitUint8(0x28);
1160 // Instruction Operands
1161 EmitOperand(dst.LowBits(), src);
1162 }
1163
movupd(XmmRegister dst,const Address & src)1164 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1165 if (CpuHasAVXorAVX2FeatureFlag()) {
1166 vmovupd(dst, src);
1167 return;
1168 }
1169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1170 EmitUint8(0x66);
1171 EmitOptionalRex32(dst, src);
1172 EmitUint8(0x0F);
1173 EmitUint8(0x10);
1174 EmitOperand(dst.LowBits(), src);
1175 }
1176
1177 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1178 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1179 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1180 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1181 bool is_twobyte_form = false;
1182 uint8_t ByteZero, ByteOne, ByteTwo;
1183
1184 // Instruction VEX Prefix
1185 uint8_t rex = src.rex();
1186 bool Rex_x = rex & GET_REX_X;
1187 bool Rex_b = rex & GET_REX_B;
1188 if (!Rex_b && !Rex_x) {
1189 is_twobyte_form = true;
1190 }
1191 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1192 if (is_twobyte_form) {
1193 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1194 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1195 vvvv_reg,
1196 SET_VEX_L_128,
1197 SET_VEX_PP_66);
1198 } else {
1199 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1200 Rex_x,
1201 Rex_b,
1202 SET_VEX_M_0F);
1203 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1204 SET_VEX_L_128,
1205 SET_VEX_PP_66);
1206 }
1207 EmitUint8(ByteZero);
1208 EmitUint8(ByteOne);
1209 if (!is_twobyte_form)
1210 EmitUint8(ByteTwo);
1211 // Instruction Opcode
1212 EmitUint8(0x10);
1213 // Instruction Operands
1214 EmitOperand(dst.LowBits(), src);
1215 }
1216
movapd(const Address & dst,XmmRegister src)1217 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1218 if (CpuHasAVXorAVX2FeatureFlag()) {
1219 vmovapd(dst, src);
1220 return;
1221 }
1222 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1223 EmitUint8(0x66);
1224 EmitOptionalRex32(src, dst);
1225 EmitUint8(0x0F);
1226 EmitUint8(0x29);
1227 EmitOperand(src.LowBits(), dst);
1228 }
1229
1230 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1231 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1232 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1233 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1234 bool is_twobyte_form = false;
1235 uint8_t ByteZero, ByteOne, ByteTwo;
1236 // Instruction VEX Prefix
1237 uint8_t rex = dst.rex();
1238 bool Rex_x = rex & GET_REX_X;
1239 bool Rex_b = rex & GET_REX_B;
1240 if (!Rex_x && !Rex_b) {
1241 is_twobyte_form = true;
1242 }
1243 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1244 if (is_twobyte_form) {
1245 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1246 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1247 vvvv_reg,
1248 SET_VEX_L_128,
1249 SET_VEX_PP_66);
1250 } else {
1251 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1252 Rex_x,
1253 Rex_b,
1254 SET_VEX_M_0F);
1255 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1256 SET_VEX_L_128,
1257 SET_VEX_PP_66);
1258 }
1259 EmitUint8(ByteZero);
1260 EmitUint8(ByteOne);
1261 if (!is_twobyte_form) {
1262 EmitUint8(ByteTwo);
1263 }
1264 // Instruction Opcode
1265 EmitUint8(0x29);
1266 // Instruction Operands
1267 EmitOperand(src.LowBits(), dst);
1268 }
1269
movupd(const Address & dst,XmmRegister src)1270 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1271 if (CpuHasAVXorAVX2FeatureFlag()) {
1272 vmovupd(dst, src);
1273 return;
1274 }
1275 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1276 EmitUint8(0x66);
1277 EmitOptionalRex32(src, dst);
1278 EmitUint8(0x0F);
1279 EmitUint8(0x11);
1280 EmitOperand(src.LowBits(), dst);
1281 }
1282
1283 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1284 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1285 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1286 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1287 bool is_twobyte_form = false;
1288 uint8_t ByteZero, ByteOne, ByteTwo;
1289
1290 // Instruction VEX Prefix
1291 uint8_t rex = dst.rex();
1292 bool Rex_x = rex & GET_REX_X;
1293 bool Rex_b = rex & GET_REX_B;
1294 if (!Rex_x && !Rex_b) {
1295 is_twobyte_form = true;
1296 }
1297 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1298 if (is_twobyte_form) {
1299 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1300 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1301 vvvv_reg,
1302 SET_VEX_L_128,
1303 SET_VEX_PP_66);
1304 } else {
1305 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1306 Rex_x,
1307 Rex_b,
1308 SET_VEX_M_0F);
1309 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1310 SET_VEX_L_128,
1311 SET_VEX_PP_66);
1312 }
1313 EmitUint8(ByteZero);
1314 EmitUint8(ByteOne);
1315 if (!is_twobyte_form) {
1316 EmitUint8(ByteTwo);
1317 }
1318 // Instruction Opcode
1319 EmitUint8(0x11);
1320 // Instruction Operands
1321 EmitOperand(src.LowBits(), dst);
1322 }
1323
1324
movsd(XmmRegister dst,const Address & src)1325 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1326 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1327 EmitUint8(0xF2);
1328 EmitOptionalRex32(dst, src);
1329 EmitUint8(0x0F);
1330 EmitUint8(0x10);
1331 EmitOperand(dst.LowBits(), src);
1332 }
1333
1334
movsd(const Address & dst,XmmRegister src)1335 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1336 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1337 EmitUint8(0xF2);
1338 EmitOptionalRex32(src, dst);
1339 EmitUint8(0x0F);
1340 EmitUint8(0x11);
1341 EmitOperand(src.LowBits(), dst);
1342 }
1343
1344
movsd(XmmRegister dst,XmmRegister src)1345 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1346 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1347 EmitUint8(0xF2);
1348 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
1349 EmitUint8(0x0F);
1350 EmitUint8(0x11);
1351 EmitXmmRegisterOperand(src.LowBits(), dst);
1352 }
1353
1354
addsd(XmmRegister dst,XmmRegister src)1355 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1356 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1357 EmitUint8(0xF2);
1358 EmitOptionalRex32(dst, src);
1359 EmitUint8(0x0F);
1360 EmitUint8(0x58);
1361 EmitXmmRegisterOperand(dst.LowBits(), src);
1362 }
1363
1364
addsd(XmmRegister dst,const Address & src)1365 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1367 EmitUint8(0xF2);
1368 EmitOptionalRex32(dst, src);
1369 EmitUint8(0x0F);
1370 EmitUint8(0x58);
1371 EmitOperand(dst.LowBits(), src);
1372 }
1373
1374
subsd(XmmRegister dst,XmmRegister src)1375 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1376 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1377 EmitUint8(0xF2);
1378 EmitOptionalRex32(dst, src);
1379 EmitUint8(0x0F);
1380 EmitUint8(0x5C);
1381 EmitXmmRegisterOperand(dst.LowBits(), src);
1382 }
1383
1384
subsd(XmmRegister dst,const Address & src)1385 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1386 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1387 EmitUint8(0xF2);
1388 EmitOptionalRex32(dst, src);
1389 EmitUint8(0x0F);
1390 EmitUint8(0x5C);
1391 EmitOperand(dst.LowBits(), src);
1392 }
1393
1394
mulsd(XmmRegister dst,XmmRegister src)1395 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1396 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1397 EmitUint8(0xF2);
1398 EmitOptionalRex32(dst, src);
1399 EmitUint8(0x0F);
1400 EmitUint8(0x59);
1401 EmitXmmRegisterOperand(dst.LowBits(), src);
1402 }
1403
1404
mulsd(XmmRegister dst,const Address & src)1405 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1406 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1407 EmitUint8(0xF2);
1408 EmitOptionalRex32(dst, src);
1409 EmitUint8(0x0F);
1410 EmitUint8(0x59);
1411 EmitOperand(dst.LowBits(), src);
1412 }
1413
1414
divsd(XmmRegister dst,XmmRegister src)1415 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1416 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1417 EmitUint8(0xF2);
1418 EmitOptionalRex32(dst, src);
1419 EmitUint8(0x0F);
1420 EmitUint8(0x5E);
1421 EmitXmmRegisterOperand(dst.LowBits(), src);
1422 }
1423
1424
divsd(XmmRegister dst,const Address & src)1425 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1426 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1427 EmitUint8(0xF2);
1428 EmitOptionalRex32(dst, src);
1429 EmitUint8(0x0F);
1430 EmitUint8(0x5E);
1431 EmitOperand(dst.LowBits(), src);
1432 }
1433
1434
addpd(XmmRegister dst,XmmRegister src)1435 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1436 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1437 EmitUint8(0x66);
1438 EmitOptionalRex32(dst, src);
1439 EmitUint8(0x0F);
1440 EmitUint8(0x58);
1441 EmitXmmRegisterOperand(dst.LowBits(), src);
1442 }
1443
1444
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1445 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1446 bool is_twobyte_form = false;
1447 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1448 if (!add_right.NeedsRex()) {
1449 is_twobyte_form = true;
1450 } else if (!add_left.NeedsRex()) {
1451 return vaddpd(dst, add_right, add_left);
1452 }
1453 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1454 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1455 X86_64ManagedRegister vvvv_reg =
1456 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1457 if (is_twobyte_form) {
1458 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1459 } else {
1460 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1461 /*X=*/ false,
1462 add_right.NeedsRex(),
1463 SET_VEX_M_0F);
1464 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1465 }
1466 EmitUint8(ByteZero);
1467 EmitUint8(ByteOne);
1468 if (!is_twobyte_form) {
1469 EmitUint8(ByteTwo);
1470 }
1471 EmitUint8(0x58);
1472 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1473 }
1474
1475
subpd(XmmRegister dst,XmmRegister src)1476 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1477 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1478 EmitUint8(0x66);
1479 EmitOptionalRex32(dst, src);
1480 EmitUint8(0x0F);
1481 EmitUint8(0x5C);
1482 EmitXmmRegisterOperand(dst.LowBits(), src);
1483 }
1484
1485
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1486 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1487 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1488 bool is_twobyte_form = false;
1489 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1490 if (!src2.NeedsRex()) {
1491 is_twobyte_form = true;
1492 }
1493 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1494 X86_64ManagedRegister vvvv_reg =
1495 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1496 if (is_twobyte_form) {
1497 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1498 } else {
1499 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1500 /*X=*/ false,
1501 src2.NeedsRex(),
1502 SET_VEX_M_0F);
1503 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1504 }
1505 EmitUint8(ByteZero);
1506 EmitUint8(ByteOne);
1507 if (!is_twobyte_form) {
1508 EmitUint8(ByteTwo);
1509 }
1510 EmitUint8(0x5C);
1511 EmitXmmRegisterOperand(dst.LowBits(), src2);
1512 }
1513
1514
mulpd(XmmRegister dst,XmmRegister src)1515 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1516 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1517 EmitUint8(0x66);
1518 EmitOptionalRex32(dst, src);
1519 EmitUint8(0x0F);
1520 EmitUint8(0x59);
1521 EmitXmmRegisterOperand(dst.LowBits(), src);
1522 }
1523
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1524 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1525 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1526 bool is_twobyte_form = false;
1527 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1528 if (!src2.NeedsRex()) {
1529 is_twobyte_form = true;
1530 } else if (!src1.NeedsRex()) {
1531 return vmulpd(dst, src2, src1);
1532 }
1533 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1534 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1535 X86_64ManagedRegister vvvv_reg =
1536 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1537 if (is_twobyte_form) {
1538 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539 } else {
1540 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1541 /*X=*/ false,
1542 src2.NeedsRex(),
1543 SET_VEX_M_0F);
1544 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1545 }
1546 EmitUint8(ByteZero);
1547 EmitUint8(ByteOne);
1548 if (!is_twobyte_form) {
1549 EmitUint8(ByteTwo);
1550 }
1551 EmitUint8(0x59);
1552 EmitXmmRegisterOperand(dst.LowBits(), src2);
1553 }
1554
divpd(XmmRegister dst,XmmRegister src)1555 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1556 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1557 EmitUint8(0x66);
1558 EmitOptionalRex32(dst, src);
1559 EmitUint8(0x0F);
1560 EmitUint8(0x5E);
1561 EmitXmmRegisterOperand(dst.LowBits(), src);
1562 }
1563
1564
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1565 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1566 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1567 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1568 bool is_twobyte_form = false;
1569 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1570 if (!src2.NeedsRex()) {
1571 is_twobyte_form = true;
1572 }
1573 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1574 X86_64ManagedRegister vvvv_reg =
1575 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1576 if (is_twobyte_form) {
1577 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1578 } else {
1579 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1580 /*X=*/ false,
1581 src2.NeedsRex(),
1582 SET_VEX_M_0F);
1583 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1584 }
1585 EmitUint8(ByteZero);
1586 EmitUint8(ByteOne);
1587 if (!is_twobyte_form) {
1588 EmitUint8(ByteTwo);
1589 }
1590 EmitUint8(0x5E);
1591 EmitXmmRegisterOperand(dst.LowBits(), src2);
1592 }
1593
1594
movdqa(XmmRegister dst,XmmRegister src)1595 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1596 if (CpuHasAVXorAVX2FeatureFlag()) {
1597 vmovdqa(dst, src);
1598 return;
1599 }
1600 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1601 EmitUint8(0x66);
1602 EmitOptionalRex32(dst, src);
1603 EmitUint8(0x0F);
1604 EmitUint8(0x6F);
1605 EmitXmmRegisterOperand(dst.LowBits(), src);
1606 }
1607
1608 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1609 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1610 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1611 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1612 uint8_t ByteZero, ByteOne, ByteTwo;
1613 bool is_twobyte_form = true;
1614
1615 // Instruction VEX Prefix
1616 if (src.NeedsRex() && dst.NeedsRex()) {
1617 is_twobyte_form = false;
1618 }
1619 bool load = dst.NeedsRex();
1620 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1621 if (is_twobyte_form) {
1622 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1623 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1624 ByteOne = EmitVexPrefixByteOne(rex_bit,
1625 vvvv_reg,
1626 SET_VEX_L_128,
1627 SET_VEX_PP_66);
1628 } else {
1629 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1630 /*X=*/ false,
1631 src.NeedsRex(),
1632 SET_VEX_M_0F);
1633 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1634 SET_VEX_L_128,
1635 SET_VEX_PP_66);
1636 }
1637 EmitUint8(ByteZero);
1638 EmitUint8(ByteOne);
1639 if (!is_twobyte_form) {
1640 EmitUint8(ByteTwo);
1641 }
1642 // Instruction Opcode
1643 if (is_twobyte_form && !load) {
1644 EmitUint8(0x7F);
1645 } else {
1646 EmitUint8(0x6F);
1647 }
1648 // Instruction Operands
1649 if (is_twobyte_form && !load) {
1650 EmitXmmRegisterOperand(src.LowBits(), dst);
1651 } else {
1652 EmitXmmRegisterOperand(dst.LowBits(), src);
1653 }
1654 }
1655
movdqa(XmmRegister dst,const Address & src)1656 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1657 if (CpuHasAVXorAVX2FeatureFlag()) {
1658 vmovdqa(dst, src);
1659 return;
1660 }
1661 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1662 EmitUint8(0x66);
1663 EmitOptionalRex32(dst, src);
1664 EmitUint8(0x0F);
1665 EmitUint8(0x6F);
1666 EmitOperand(dst.LowBits(), src);
1667 }
1668
1669 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1670 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1671 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1672 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1673 uint8_t ByteZero, ByteOne, ByteTwo;
1674 bool is_twobyte_form = false;
1675
1676 // Instruction VEX Prefix
1677 uint8_t rex = src.rex();
1678 bool Rex_x = rex & GET_REX_X;
1679 bool Rex_b = rex & GET_REX_B;
1680 if (!Rex_x && !Rex_b) {
1681 is_twobyte_form = true;
1682 }
1683 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1684 if (is_twobyte_form) {
1685 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1686 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1687 vvvv_reg,
1688 SET_VEX_L_128,
1689 SET_VEX_PP_66);
1690 } else {
1691 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1692 Rex_x,
1693 Rex_b,
1694 SET_VEX_M_0F);
1695 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1696 SET_VEX_L_128,
1697 SET_VEX_PP_66);
1698 }
1699 EmitUint8(ByteZero);
1700 EmitUint8(ByteOne);
1701 if (!is_twobyte_form) {
1702 EmitUint8(ByteTwo);
1703 }
1704 // Instruction Opcode
1705 EmitUint8(0x6F);
1706 // Instruction Operands
1707 EmitOperand(dst.LowBits(), src);
1708 }
1709
movdqu(XmmRegister dst,const Address & src)1710 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1711 if (CpuHasAVXorAVX2FeatureFlag()) {
1712 vmovdqu(dst, src);
1713 return;
1714 }
1715 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1716 EmitUint8(0xF3);
1717 EmitOptionalRex32(dst, src);
1718 EmitUint8(0x0F);
1719 EmitUint8(0x6F);
1720 EmitOperand(dst.LowBits(), src);
1721 }
1722
1723 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1724 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1725 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1726 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1727 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1728 uint8_t ByteZero, ByteOne, ByteTwo;
1729 bool is_twobyte_form = false;
1730
1731 // Instruction VEX Prefix
1732 uint8_t rex = src.rex();
1733 bool Rex_x = rex & GET_REX_X;
1734 bool Rex_b = rex & GET_REX_B;
1735 if (!Rex_x && !Rex_b) {
1736 is_twobyte_form = true;
1737 }
1738 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1739 if (is_twobyte_form) {
1740 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1741 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1742 vvvv_reg,
1743 SET_VEX_L_128,
1744 SET_VEX_PP_F3);
1745 } else {
1746 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1747 Rex_x,
1748 Rex_b,
1749 SET_VEX_M_0F);
1750 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1751 SET_VEX_L_128,
1752 SET_VEX_PP_F3);
1753 }
1754 EmitUint8(ByteZero);
1755 EmitUint8(ByteOne);
1756 if (!is_twobyte_form) {
1757 EmitUint8(ByteTwo);
1758 }
1759 // Instruction Opcode
1760 EmitUint8(0x6F);
1761 // Instruction Operands
1762 EmitOperand(dst.LowBits(), src);
1763 }
1764
movdqa(const Address & dst,XmmRegister src)1765 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1766 if (CpuHasAVXorAVX2FeatureFlag()) {
1767 vmovdqa(dst, src);
1768 return;
1769 }
1770 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1771 EmitUint8(0x66);
1772 EmitOptionalRex32(src, dst);
1773 EmitUint8(0x0F);
1774 EmitUint8(0x7F);
1775 EmitOperand(src.LowBits(), dst);
1776 }
1777
1778 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1779 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1780 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1781 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1782 bool is_twobyte_form = false;
1783 uint8_t ByteZero, ByteOne, ByteTwo;
1784 // Instruction VEX Prefix
1785 uint8_t rex = dst.rex();
1786 bool Rex_x = rex & GET_REX_X;
1787 bool Rex_b = rex & GET_REX_B;
1788 if (!Rex_x && !Rex_b) {
1789 is_twobyte_form = true;
1790 }
1791 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1792 if (is_twobyte_form) {
1793 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1794 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1795 vvvv_reg,
1796 SET_VEX_L_128,
1797 SET_VEX_PP_66);
1798 } else {
1799 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1800 Rex_x,
1801 Rex_b,
1802 SET_VEX_M_0F);
1803 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1804 SET_VEX_L_128,
1805 SET_VEX_PP_66);
1806 }
1807 EmitUint8(ByteZero);
1808 EmitUint8(ByteOne);
1809 if (!is_twobyte_form) {
1810 EmitUint8(ByteTwo);
1811 }
1812 // Instruction Opcode
1813 EmitUint8(0x7F);
1814 // Instruction Operands
1815 EmitOperand(src.LowBits(), dst);
1816 }
1817
movdqu(const Address & dst,XmmRegister src)1818 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1819 if (CpuHasAVXorAVX2FeatureFlag()) {
1820 vmovdqu(dst, src);
1821 return;
1822 }
1823 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1824 EmitUint8(0xF3);
1825 EmitOptionalRex32(src, dst);
1826 EmitUint8(0x0F);
1827 EmitUint8(0x7F);
1828 EmitOperand(src.LowBits(), dst);
1829 }
1830
1831 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1832 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1833 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1834 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1835 uint8_t ByteZero, ByteOne, ByteTwo;
1836 bool is_twobyte_form = false;
1837
1838 // Instruction VEX Prefix
1839 uint8_t rex = dst.rex();
1840 bool Rex_x = rex & GET_REX_X;
1841 bool Rex_b = rex & GET_REX_B;
1842 if (!Rex_b && !Rex_x) {
1843 is_twobyte_form = true;
1844 }
1845 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846 if (is_twobyte_form) {
1847 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1848 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1849 vvvv_reg,
1850 SET_VEX_L_128,
1851 SET_VEX_PP_F3);
1852 } else {
1853 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1854 Rex_x,
1855 Rex_b,
1856 SET_VEX_M_0F);
1857 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1858 SET_VEX_L_128,
1859 SET_VEX_PP_F3);
1860 }
1861 EmitUint8(ByteZero);
1862 EmitUint8(ByteOne);
1863 if (!is_twobyte_form) {
1864 EmitUint8(ByteTwo);
1865 }
1866 // Instruction Opcode
1867 EmitUint8(0x7F);
1868 // Instruction Operands
1869 EmitOperand(src.LowBits(), dst);
1870 }
1871
paddb(XmmRegister dst,XmmRegister src)1872 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1873 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1874 EmitUint8(0x66);
1875 EmitOptionalRex32(dst, src);
1876 EmitUint8(0x0F);
1877 EmitUint8(0xFC);
1878 EmitXmmRegisterOperand(dst.LowBits(), src);
1879 }
1880
1881
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1882 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1883 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1884 uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1885 bool is_twobyte_form = false;
1886 if (!add_right.NeedsRex()) {
1887 is_twobyte_form = true;
1888 } else if (!add_left.NeedsRex()) {
1889 return vpaddb(dst, add_right, add_left);
1890 }
1891 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1892 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1893 X86_64ManagedRegister vvvv_reg =
1894 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1895 if (is_twobyte_form) {
1896 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1897 } else {
1898 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1899 /*X=*/ false,
1900 add_right.NeedsRex(),
1901 SET_VEX_M_0F);
1902 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1903 }
1904 EmitUint8(ByteZero);
1905 EmitUint8(ByteOne);
1906 if (!is_twobyte_form) {
1907 EmitUint8(ByteTwo);
1908 }
1909 EmitUint8(0xFC);
1910 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1911 }
1912
1913
psubb(XmmRegister dst,XmmRegister src)1914 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1915 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1916 EmitUint8(0x66);
1917 EmitOptionalRex32(dst, src);
1918 EmitUint8(0x0F);
1919 EmitUint8(0xF8);
1920 EmitXmmRegisterOperand(dst.LowBits(), src);
1921 }
1922
1923
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1924 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1925 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1926 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1927 bool is_twobyte_form = false;
1928 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1929 if (!add_right.NeedsRex()) {
1930 is_twobyte_form = true;
1931 }
1932 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1933 X86_64ManagedRegister vvvv_reg =
1934 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1935 if (is_twobyte_form) {
1936 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1937 } else {
1938 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1939 /*X=*/ false,
1940 add_right.NeedsRex(),
1941 SET_VEX_M_0F);
1942 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1943 }
1944 EmitUint8(ByteZero);
1945 EmitUint8(ByteOne);
1946 if (!is_twobyte_form) {
1947 EmitUint8(ByteTwo);
1948 }
1949 EmitUint8(0xF8);
1950 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1951 }
1952
1953
paddw(XmmRegister dst,XmmRegister src)1954 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1956 EmitUint8(0x66);
1957 EmitOptionalRex32(dst, src);
1958 EmitUint8(0x0F);
1959 EmitUint8(0xFD);
1960 EmitXmmRegisterOperand(dst.LowBits(), src);
1961 }
1962
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1963 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1964 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1965 bool is_twobyte_form = false;
1966 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1967 if (!add_right.NeedsRex()) {
1968 is_twobyte_form = true;
1969 } else if (!add_left.NeedsRex()) {
1970 return vpaddw(dst, add_right, add_left);
1971 }
1972 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1973 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1974 X86_64ManagedRegister vvvv_reg =
1975 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1976 if (is_twobyte_form) {
1977 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1978 } else {
1979 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1980 /*X=*/ false,
1981 add_right.NeedsRex(),
1982 SET_VEX_M_0F);
1983 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1984 }
1985 EmitUint8(ByteZero);
1986 EmitUint8(ByteOne);
1987 if (!is_twobyte_form) {
1988 EmitUint8(ByteTwo);
1989 }
1990 EmitUint8(0xFD);
1991 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1992 }
1993
1994
psubw(XmmRegister dst,XmmRegister src)1995 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1996 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997 EmitUint8(0x66);
1998 EmitOptionalRex32(dst, src);
1999 EmitUint8(0x0F);
2000 EmitUint8(0xF9);
2001 EmitXmmRegisterOperand(dst.LowBits(), src);
2002 }
2003
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2004 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2005 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2006 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2007 bool is_twobyte_form = false;
2008 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2009 if (!add_right.NeedsRex()) {
2010 is_twobyte_form = true;
2011 }
2012 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2013 X86_64ManagedRegister vvvv_reg =
2014 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2015 if (is_twobyte_form) {
2016 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2017 } else {
2018 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2019 /*X=*/ false,
2020 add_right.NeedsRex(),
2021 SET_VEX_M_0F);
2022 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2023 }
2024 EmitUint8(ByteZero);
2025 EmitUint8(ByteOne);
2026 if (!is_twobyte_form) {
2027 EmitUint8(ByteTwo);
2028 }
2029 EmitUint8(0xF9);
2030 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2031 }
2032
2033
pmullw(XmmRegister dst,XmmRegister src)2034 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
2035 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2036 EmitUint8(0x66);
2037 EmitOptionalRex32(dst, src);
2038 EmitUint8(0x0F);
2039 EmitUint8(0xD5);
2040 EmitXmmRegisterOperand(dst.LowBits(), src);
2041 }
2042
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)2043 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2044 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2045 bool is_twobyte_form = false;
2046 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2047 if (!src2.NeedsRex()) {
2048 is_twobyte_form = true;
2049 } else if (!src1.NeedsRex()) {
2050 return vpmullw(dst, src2, src1);
2051 }
2052 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2053 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2054 X86_64ManagedRegister vvvv_reg =
2055 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2056 if (is_twobyte_form) {
2057 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2058 } else {
2059 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2060 /*X=*/ false,
2061 src2.NeedsRex(),
2062 SET_VEX_M_0F);
2063 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2064 }
2065 EmitUint8(ByteZero);
2066 EmitUint8(ByteOne);
2067 if (!is_twobyte_form) {
2068 EmitUint8(ByteTwo);
2069 }
2070 EmitUint8(0xD5);
2071 EmitXmmRegisterOperand(dst.LowBits(), src2);
2072 }
2073
paddd(XmmRegister dst,XmmRegister src)2074 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2075 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2076 EmitUint8(0x66);
2077 EmitOptionalRex32(dst, src);
2078 EmitUint8(0x0F);
2079 EmitUint8(0xFE);
2080 EmitXmmRegisterOperand(dst.LowBits(), src);
2081 }
2082
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2083 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2084 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2085 bool is_twobyte_form = false;
2086 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2087 if (!add_right.NeedsRex()) {
2088 is_twobyte_form = true;
2089 } else if (!add_left.NeedsRex()) {
2090 return vpaddd(dst, add_right, add_left);
2091 }
2092 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2093 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2094 X86_64ManagedRegister vvvv_reg =
2095 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2096 if (is_twobyte_form) {
2097 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2098 } else {
2099 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2100 /*X=*/ false,
2101 add_right.NeedsRex(),
2102 SET_VEX_M_0F);
2103 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2104 }
2105 EmitUint8(ByteZero);
2106 EmitUint8(ByteOne);
2107 if (!is_twobyte_form) {
2108 EmitUint8(ByteTwo);
2109 }
2110 EmitUint8(0xFE);
2111 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2112 }
2113
psubd(XmmRegister dst,XmmRegister src)2114 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2116 EmitUint8(0x66);
2117 EmitOptionalRex32(dst, src);
2118 EmitUint8(0x0F);
2119 EmitUint8(0xFA);
2120 EmitXmmRegisterOperand(dst.LowBits(), src);
2121 }
2122
2123
pmulld(XmmRegister dst,XmmRegister src)2124 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2125 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2126 EmitUint8(0x66);
2127 EmitOptionalRex32(dst, src);
2128 EmitUint8(0x0F);
2129 EmitUint8(0x38);
2130 EmitUint8(0x40);
2131 EmitXmmRegisterOperand(dst.LowBits(), src);
2132 }
2133
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2134 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2135 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2136 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2137 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2138 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2139 X86_64ManagedRegister vvvv_reg =
2140 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2141 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2142 /*X=*/ false,
2143 src2.NeedsRex(),
2144 SET_VEX_M_0F_38);
2145 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2146 EmitUint8(ByteZero);
2147 EmitUint8(ByteOne);
2148 EmitUint8(ByteTwo);
2149 EmitUint8(0x40);
2150 EmitXmmRegisterOperand(dst.LowBits(), src2);
2151 }
2152
paddq(XmmRegister dst,XmmRegister src)2153 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2154 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2155 EmitUint8(0x66);
2156 EmitOptionalRex32(dst, src);
2157 EmitUint8(0x0F);
2158 EmitUint8(0xD4);
2159 EmitXmmRegisterOperand(dst.LowBits(), src);
2160 }
2161
2162
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2163 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2164 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2165 bool is_twobyte_form = false;
2166 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2167 if (!add_right.NeedsRex()) {
2168 is_twobyte_form = true;
2169 } else if (!add_left.NeedsRex()) {
2170 return vpaddq(dst, add_right, add_left);
2171 }
2172 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2173 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2174 X86_64ManagedRegister vvvv_reg =
2175 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2176 if (is_twobyte_form) {
2177 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2178 } else {
2179 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2180 /*X=*/ false,
2181 add_right.NeedsRex(),
2182 SET_VEX_M_0F);
2183 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2184 }
2185 EmitUint8(ByteZero);
2186 EmitUint8(ByteOne);
2187 if (!is_twobyte_form) {
2188 EmitUint8(ByteTwo);
2189 }
2190 EmitUint8(0xD4);
2191 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2192 }
2193
2194
psubq(XmmRegister dst,XmmRegister src)2195 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2196 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2197 EmitUint8(0x66);
2198 EmitOptionalRex32(dst, src);
2199 EmitUint8(0x0F);
2200 EmitUint8(0xFB);
2201 EmitXmmRegisterOperand(dst.LowBits(), src);
2202 }
2203
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2204 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2205 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2206 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2207 bool is_twobyte_form = false;
2208 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2209 if (!add_right.NeedsRex()) {
2210 is_twobyte_form = true;
2211 }
2212 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2213 X86_64ManagedRegister vvvv_reg =
2214 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2215 if (is_twobyte_form) {
2216 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2217 } else {
2218 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2219 /*X=*/ false,
2220 add_right.NeedsRex(),
2221 SET_VEX_M_0F);
2222 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2223 }
2224 EmitUint8(ByteZero);
2225 EmitUint8(ByteOne);
2226 if (!is_twobyte_form) {
2227 EmitUint8(ByteTwo);
2228 }
2229 EmitUint8(0xFB);
2230 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2231 }
2232
2233
paddusb(XmmRegister dst,XmmRegister src)2234 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2235 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2236 EmitUint8(0x66);
2237 EmitOptionalRex32(dst, src);
2238 EmitUint8(0x0F);
2239 EmitUint8(0xDC);
2240 EmitXmmRegisterOperand(dst.LowBits(), src);
2241 }
2242
2243
paddsb(XmmRegister dst,XmmRegister src)2244 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2245 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2246 EmitUint8(0x66);
2247 EmitOptionalRex32(dst, src);
2248 EmitUint8(0x0F);
2249 EmitUint8(0xEC);
2250 EmitXmmRegisterOperand(dst.LowBits(), src);
2251 }
2252
2253
paddusw(XmmRegister dst,XmmRegister src)2254 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2255 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2256 EmitUint8(0x66);
2257 EmitOptionalRex32(dst, src);
2258 EmitUint8(0x0F);
2259 EmitUint8(0xDD);
2260 EmitXmmRegisterOperand(dst.LowBits(), src);
2261 }
2262
2263
paddsw(XmmRegister dst,XmmRegister src)2264 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2265 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2266 EmitUint8(0x66);
2267 EmitOptionalRex32(dst, src);
2268 EmitUint8(0x0F);
2269 EmitUint8(0xED);
2270 EmitXmmRegisterOperand(dst.LowBits(), src);
2271 }
2272
2273
psubusb(XmmRegister dst,XmmRegister src)2274 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2275 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2276 EmitUint8(0x66);
2277 EmitOptionalRex32(dst, src);
2278 EmitUint8(0x0F);
2279 EmitUint8(0xD8);
2280 EmitXmmRegisterOperand(dst.LowBits(), src);
2281 }
2282
2283
psubsb(XmmRegister dst,XmmRegister src)2284 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2286 EmitUint8(0x66);
2287 EmitOptionalRex32(dst, src);
2288 EmitUint8(0x0F);
2289 EmitUint8(0xE8);
2290 EmitXmmRegisterOperand(dst.LowBits(), src);
2291 }
2292
2293
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2294 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2295 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2297 bool is_twobyte_form = false;
2298 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2299 if (!add_right.NeedsRex()) {
2300 is_twobyte_form = true;
2301 }
2302 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2303 X86_64ManagedRegister vvvv_reg =
2304 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2305 if (is_twobyte_form) {
2306 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2307 } else {
2308 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2309 /*X=*/ false,
2310 add_right.NeedsRex(),
2311 SET_VEX_M_0F);
2312 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2313 }
2314 EmitUint8(ByteZero);
2315 EmitUint8(ByteOne);
2316 if (!is_twobyte_form) {
2317 EmitUint8(ByteTwo);
2318 }
2319 EmitUint8(0xFA);
2320 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2321 }
2322
2323
psubusw(XmmRegister dst,XmmRegister src)2324 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2325 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2326 EmitUint8(0x66);
2327 EmitOptionalRex32(dst, src);
2328 EmitUint8(0x0F);
2329 EmitUint8(0xD9);
2330 EmitXmmRegisterOperand(dst.LowBits(), src);
2331 }
2332
2333
psubsw(XmmRegister dst,XmmRegister src)2334 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2335 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2336 EmitUint8(0x66);
2337 EmitOptionalRex32(dst, src);
2338 EmitUint8(0x0F);
2339 EmitUint8(0xE9);
2340 EmitXmmRegisterOperand(dst.LowBits(), src);
2341 }
2342
2343
cvtsi2ss(XmmRegister dst,CpuRegister src)2344 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2345 cvtsi2ss(dst, src, false);
2346 }
2347
2348
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2349 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2350 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2351 EmitUint8(0xF3);
2352 if (is64bit) {
2353 // Emit a REX.W prefix if the operand size is 64 bits.
2354 EmitRex64(dst, src);
2355 } else {
2356 EmitOptionalRex32(dst, src);
2357 }
2358 EmitUint8(0x0F);
2359 EmitUint8(0x2A);
2360 EmitOperand(dst.LowBits(), Operand(src));
2361 }
2362
2363
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2364 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2366 EmitUint8(0xF3);
2367 if (is64bit) {
2368 // Emit a REX.W prefix if the operand size is 64 bits.
2369 EmitRex64(dst, src);
2370 } else {
2371 EmitOptionalRex32(dst, src);
2372 }
2373 EmitUint8(0x0F);
2374 EmitUint8(0x2A);
2375 EmitOperand(dst.LowBits(), src);
2376 }
2377
2378
cvtsi2sd(XmmRegister dst,CpuRegister src)2379 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2380 cvtsi2sd(dst, src, false);
2381 }
2382
2383
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2384 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2386 EmitUint8(0xF2);
2387 if (is64bit) {
2388 // Emit a REX.W prefix if the operand size is 64 bits.
2389 EmitRex64(dst, src);
2390 } else {
2391 EmitOptionalRex32(dst, src);
2392 }
2393 EmitUint8(0x0F);
2394 EmitUint8(0x2A);
2395 EmitOperand(dst.LowBits(), Operand(src));
2396 }
2397
2398
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2399 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2400 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2401 EmitUint8(0xF2);
2402 if (is64bit) {
2403 // Emit a REX.W prefix if the operand size is 64 bits.
2404 EmitRex64(dst, src);
2405 } else {
2406 EmitOptionalRex32(dst, src);
2407 }
2408 EmitUint8(0x0F);
2409 EmitUint8(0x2A);
2410 EmitOperand(dst.LowBits(), src);
2411 }
2412
2413
cvtss2si(CpuRegister dst,XmmRegister src)2414 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2415 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2416 EmitUint8(0xF3);
2417 EmitOptionalRex32(dst, src);
2418 EmitUint8(0x0F);
2419 EmitUint8(0x2D);
2420 EmitXmmRegisterOperand(dst.LowBits(), src);
2421 }
2422
2423
cvtss2sd(XmmRegister dst,XmmRegister src)2424 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2425 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426 EmitUint8(0xF3);
2427 EmitOptionalRex32(dst, src);
2428 EmitUint8(0x0F);
2429 EmitUint8(0x5A);
2430 EmitXmmRegisterOperand(dst.LowBits(), src);
2431 }
2432
2433
cvtss2sd(XmmRegister dst,const Address & src)2434 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2435 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2436 EmitUint8(0xF3);
2437 EmitOptionalRex32(dst, src);
2438 EmitUint8(0x0F);
2439 EmitUint8(0x5A);
2440 EmitOperand(dst.LowBits(), src);
2441 }
2442
2443
cvtsd2si(CpuRegister dst,XmmRegister src)2444 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2445 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2446 EmitUint8(0xF2);
2447 EmitOptionalRex32(dst, src);
2448 EmitUint8(0x0F);
2449 EmitUint8(0x2D);
2450 EmitXmmRegisterOperand(dst.LowBits(), src);
2451 }
2452
2453
cvttss2si(CpuRegister dst,XmmRegister src)2454 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2455 cvttss2si(dst, src, false);
2456 }
2457
2458
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2459 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461 EmitUint8(0xF3);
2462 if (is64bit) {
2463 // Emit a REX.W prefix if the operand size is 64 bits.
2464 EmitRex64(dst, src);
2465 } else {
2466 EmitOptionalRex32(dst, src);
2467 }
2468 EmitUint8(0x0F);
2469 EmitUint8(0x2C);
2470 EmitXmmRegisterOperand(dst.LowBits(), src);
2471 }
2472
2473
cvttsd2si(CpuRegister dst,XmmRegister src)2474 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2475 cvttsd2si(dst, src, false);
2476 }
2477
2478
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2479 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2480 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2481 EmitUint8(0xF2);
2482 if (is64bit) {
2483 // Emit a REX.W prefix if the operand size is 64 bits.
2484 EmitRex64(dst, src);
2485 } else {
2486 EmitOptionalRex32(dst, src);
2487 }
2488 EmitUint8(0x0F);
2489 EmitUint8(0x2C);
2490 EmitXmmRegisterOperand(dst.LowBits(), src);
2491 }
2492
2493
cvtsd2ss(XmmRegister dst,XmmRegister src)2494 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2495 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2496 EmitUint8(0xF2);
2497 EmitOptionalRex32(dst, src);
2498 EmitUint8(0x0F);
2499 EmitUint8(0x5A);
2500 EmitXmmRegisterOperand(dst.LowBits(), src);
2501 }
2502
2503
cvtsd2ss(XmmRegister dst,const Address & src)2504 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2505 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2506 EmitUint8(0xF2);
2507 EmitOptionalRex32(dst, src);
2508 EmitUint8(0x0F);
2509 EmitUint8(0x5A);
2510 EmitOperand(dst.LowBits(), src);
2511 }
2512
2513
cvtdq2ps(XmmRegister dst,XmmRegister src)2514 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2515 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2516 EmitOptionalRex32(dst, src);
2517 EmitUint8(0x0F);
2518 EmitUint8(0x5B);
2519 EmitXmmRegisterOperand(dst.LowBits(), src);
2520 }
2521
2522
cvtdq2pd(XmmRegister dst,XmmRegister src)2523 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2524 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2525 EmitUint8(0xF3);
2526 EmitOptionalRex32(dst, src);
2527 EmitUint8(0x0F);
2528 EmitUint8(0xE6);
2529 EmitXmmRegisterOperand(dst.LowBits(), src);
2530 }
2531
2532
comiss(XmmRegister a,XmmRegister b)2533 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2534 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2535 EmitOptionalRex32(a, b);
2536 EmitUint8(0x0F);
2537 EmitUint8(0x2F);
2538 EmitXmmRegisterOperand(a.LowBits(), b);
2539 }
2540
2541
comiss(XmmRegister a,const Address & b)2542 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2543 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2544 EmitOptionalRex32(a, b);
2545 EmitUint8(0x0F);
2546 EmitUint8(0x2F);
2547 EmitOperand(a.LowBits(), b);
2548 }
2549
2550
comisd(XmmRegister a,XmmRegister b)2551 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2552 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2553 EmitUint8(0x66);
2554 EmitOptionalRex32(a, b);
2555 EmitUint8(0x0F);
2556 EmitUint8(0x2F);
2557 EmitXmmRegisterOperand(a.LowBits(), b);
2558 }
2559
2560
comisd(XmmRegister a,const Address & b)2561 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2562 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2563 EmitUint8(0x66);
2564 EmitOptionalRex32(a, b);
2565 EmitUint8(0x0F);
2566 EmitUint8(0x2F);
2567 EmitOperand(a.LowBits(), b);
2568 }
2569
2570
ucomiss(XmmRegister a,XmmRegister b)2571 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2572 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2573 EmitOptionalRex32(a, b);
2574 EmitUint8(0x0F);
2575 EmitUint8(0x2E);
2576 EmitXmmRegisterOperand(a.LowBits(), b);
2577 }
2578
2579
ucomiss(XmmRegister a,const Address & b)2580 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2581 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2582 EmitOptionalRex32(a, b);
2583 EmitUint8(0x0F);
2584 EmitUint8(0x2E);
2585 EmitOperand(a.LowBits(), b);
2586 }
2587
2588
ucomisd(XmmRegister a,XmmRegister b)2589 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2590 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2591 EmitUint8(0x66);
2592 EmitOptionalRex32(a, b);
2593 EmitUint8(0x0F);
2594 EmitUint8(0x2E);
2595 EmitXmmRegisterOperand(a.LowBits(), b);
2596 }
2597
2598
ucomisd(XmmRegister a,const Address & b)2599 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2600 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2601 EmitUint8(0x66);
2602 EmitOptionalRex32(a, b);
2603 EmitUint8(0x0F);
2604 EmitUint8(0x2E);
2605 EmitOperand(a.LowBits(), b);
2606 }
2607
2608
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2609 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2610 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2611 EmitUint8(0x66);
2612 EmitOptionalRex32(dst, src);
2613 EmitUint8(0x0F);
2614 EmitUint8(0x3A);
2615 EmitUint8(0x0B);
2616 EmitXmmRegisterOperand(dst.LowBits(), src);
2617 EmitUint8(imm.value());
2618 }
2619
2620
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2621 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2622 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2623 EmitUint8(0x66);
2624 EmitOptionalRex32(dst, src);
2625 EmitUint8(0x0F);
2626 EmitUint8(0x3A);
2627 EmitUint8(0x0A);
2628 EmitXmmRegisterOperand(dst.LowBits(), src);
2629 EmitUint8(imm.value());
2630 }
2631
2632
sqrtsd(XmmRegister dst,XmmRegister src)2633 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2634 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2635 EmitUint8(0xF2);
2636 EmitOptionalRex32(dst, src);
2637 EmitUint8(0x0F);
2638 EmitUint8(0x51);
2639 EmitXmmRegisterOperand(dst.LowBits(), src);
2640 }
2641
2642
sqrtss(XmmRegister dst,XmmRegister src)2643 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2644 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2645 EmitUint8(0xF3);
2646 EmitOptionalRex32(dst, src);
2647 EmitUint8(0x0F);
2648 EmitUint8(0x51);
2649 EmitXmmRegisterOperand(dst.LowBits(), src);
2650 }
2651
2652
xorpd(XmmRegister dst,const Address & src)2653 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2654 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2655 EmitUint8(0x66);
2656 EmitOptionalRex32(dst, src);
2657 EmitUint8(0x0F);
2658 EmitUint8(0x57);
2659 EmitOperand(dst.LowBits(), src);
2660 }
2661
2662
xorpd(XmmRegister dst,XmmRegister src)2663 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2664 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2665 EmitUint8(0x66);
2666 EmitOptionalRex32(dst, src);
2667 EmitUint8(0x0F);
2668 EmitUint8(0x57);
2669 EmitXmmRegisterOperand(dst.LowBits(), src);
2670 }
2671
2672
xorps(XmmRegister dst,const Address & src)2673 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2674 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2675 EmitOptionalRex32(dst, src);
2676 EmitUint8(0x0F);
2677 EmitUint8(0x57);
2678 EmitOperand(dst.LowBits(), src);
2679 }
2680
2681
xorps(XmmRegister dst,XmmRegister src)2682 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2683 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2684 EmitOptionalRex32(dst, src);
2685 EmitUint8(0x0F);
2686 EmitUint8(0x57);
2687 EmitXmmRegisterOperand(dst.LowBits(), src);
2688 }
2689
pxor(XmmRegister dst,XmmRegister src)2690 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2691 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2692 EmitUint8(0x66);
2693 EmitOptionalRex32(dst, src);
2694 EmitUint8(0x0F);
2695 EmitUint8(0xEF);
2696 EmitXmmRegisterOperand(dst.LowBits(), src);
2697 }
2698
2699 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2700 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2701 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2702 bool is_twobyte_form = false;
2703 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2704 if (!src2.NeedsRex()) {
2705 is_twobyte_form = true;
2706 } else if (!src1.NeedsRex()) {
2707 return vpxor(dst, src2, src1);
2708 }
2709 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2710 X86_64ManagedRegister vvvv_reg =
2711 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2712 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2713 if (is_twobyte_form) {
2714 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2715 } else {
2716 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2717 /*X=*/ false,
2718 src2.NeedsRex(),
2719 SET_VEX_M_0F);
2720 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2721 }
2722 EmitUint8(ByteZero);
2723 EmitUint8(ByteOne);
2724 if (!is_twobyte_form) {
2725 EmitUint8(ByteTwo);
2726 }
2727 EmitUint8(0xEF);
2728 EmitXmmRegisterOperand(dst.LowBits(), src2);
2729 }
2730
2731 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2732 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2733 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2734 bool is_twobyte_form = false;
2735 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2736 if (!src2.NeedsRex()) {
2737 is_twobyte_form = true;
2738 } else if (!src1.NeedsRex()) {
2739 return vxorps(dst, src2, src1);
2740 }
2741 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2742 X86_64ManagedRegister vvvv_reg =
2743 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2744 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2745 if (is_twobyte_form) {
2746 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2747 } else {
2748 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2749 /*X=*/ false,
2750 src2.NeedsRex(),
2751 SET_VEX_M_0F);
2752 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2753 }
2754 EmitUint8(ByteZero);
2755 EmitUint8(ByteOne);
2756 if (!is_twobyte_form) {
2757 EmitUint8(ByteTwo);
2758 }
2759 EmitUint8(0x57);
2760 EmitXmmRegisterOperand(dst.LowBits(), src2);
2761 }
2762
2763 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2764 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2765 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2766 bool is_twobyte_form = false;
2767 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2768 if (!src2.NeedsRex()) {
2769 is_twobyte_form = true;
2770 } else if (!src1.NeedsRex()) {
2771 return vxorpd(dst, src2, src1);
2772 }
2773 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2774 X86_64ManagedRegister vvvv_reg =
2775 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2776 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2777 if (is_twobyte_form) {
2778 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2779 } else {
2780 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2781 /*X=*/ false,
2782 src2.NeedsRex(),
2783 SET_VEX_M_0F);
2784 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2785 }
2786 EmitUint8(ByteZero);
2787 EmitUint8(ByteOne);
2788 if (!is_twobyte_form) {
2789 EmitUint8(ByteTwo);
2790 }
2791 EmitUint8(0x57);
2792 EmitXmmRegisterOperand(dst.LowBits(), src2);
2793 }
2794
andpd(XmmRegister dst,const Address & src)2795 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2796 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2797 EmitUint8(0x66);
2798 EmitOptionalRex32(dst, src);
2799 EmitUint8(0x0F);
2800 EmitUint8(0x54);
2801 EmitOperand(dst.LowBits(), src);
2802 }
2803
andpd(XmmRegister dst,XmmRegister src)2804 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2805 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2806 EmitUint8(0x66);
2807 EmitOptionalRex32(dst, src);
2808 EmitUint8(0x0F);
2809 EmitUint8(0x54);
2810 EmitXmmRegisterOperand(dst.LowBits(), src);
2811 }
2812
andps(XmmRegister dst,XmmRegister src)2813 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2814 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2815 EmitOptionalRex32(dst, src);
2816 EmitUint8(0x0F);
2817 EmitUint8(0x54);
2818 EmitXmmRegisterOperand(dst.LowBits(), src);
2819 }
2820
pand(XmmRegister dst,XmmRegister src)2821 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2823 EmitUint8(0x66);
2824 EmitOptionalRex32(dst, src);
2825 EmitUint8(0x0F);
2826 EmitUint8(0xDB);
2827 EmitXmmRegisterOperand(dst.LowBits(), src);
2828 }
2829
2830 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2831 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2832 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2833 bool is_twobyte_form = false;
2834 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835 if (!src2.NeedsRex()) {
2836 is_twobyte_form = true;
2837 } else if (!src1.NeedsRex()) {
2838 return vpand(dst, src2, src1);
2839 }
2840 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2841 X86_64ManagedRegister vvvv_reg =
2842 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2843 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2844 if (is_twobyte_form) {
2845 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2846 } else {
2847 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2848 /*X=*/ false,
2849 src2.NeedsRex(),
2850 SET_VEX_M_0F);
2851 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2852 }
2853 EmitUint8(ByteZero);
2854 EmitUint8(ByteOne);
2855 if (!is_twobyte_form) {
2856 EmitUint8(ByteTwo);
2857 }
2858 EmitUint8(0xDB);
2859 EmitXmmRegisterOperand(dst.LowBits(), src2);
2860 }
2861
2862 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2863 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2864 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2865 bool is_twobyte_form = false;
2866 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2867 if (!src2.NeedsRex()) {
2868 is_twobyte_form = true;
2869 } else if (!src1.NeedsRex()) {
2870 return vandps(dst, src2, src1);
2871 }
2872 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2873 X86_64ManagedRegister vvvv_reg =
2874 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2875 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2876 if (is_twobyte_form) {
2877 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2878 } else {
2879 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2880 /*X=*/ false,
2881 src2.NeedsRex(),
2882 SET_VEX_M_0F);
2883 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2884 }
2885 EmitUint8(ByteZero);
2886 EmitUint8(ByteOne);
2887 if (!is_twobyte_form) {
2888 EmitUint8(ByteTwo);
2889 }
2890 EmitUint8(0x54);
2891 EmitXmmRegisterOperand(dst.LowBits(), src2);
2892 }
2893
2894 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2895 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2896 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2897 bool is_twobyte_form = false;
2898 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2899 if (!src2.NeedsRex()) {
2900 is_twobyte_form = true;
2901 } else if (!src1.NeedsRex()) {
2902 return vandpd(dst, src2, src1);
2903 }
2904 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2905 X86_64ManagedRegister vvvv_reg =
2906 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2907 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2908 if (is_twobyte_form) {
2909 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2910 } else {
2911 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2912 /*X=*/ false,
2913 src2.NeedsRex(),
2914 SET_VEX_M_0F);
2915 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2916 }
2917 EmitUint8(ByteZero);
2918 EmitUint8(ByteOne);
2919 if (!is_twobyte_form) {
2920 EmitUint8(ByteTwo);
2921 }
2922 EmitUint8(0x54);
2923 EmitXmmRegisterOperand(dst.LowBits(), src2);
2924 }
2925
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2926 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2927 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2928 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2929 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2930 /*X=*/ false,
2931 src2.NeedsRex(),
2932 SET_VEX_M_0F_38);
2933 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2934 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2935 SET_VEX_L_128,
2936 SET_VEX_PP_NONE);
2937 EmitUint8(byte_zero);
2938 EmitUint8(byte_one);
2939 EmitUint8(byte_two);
2940 // Opcode field
2941 EmitUint8(0xF2);
2942 EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2943 }
2944
andnpd(XmmRegister dst,XmmRegister src)2945 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2946 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2947 EmitUint8(0x66);
2948 EmitOptionalRex32(dst, src);
2949 EmitUint8(0x0F);
2950 EmitUint8(0x55);
2951 EmitXmmRegisterOperand(dst.LowBits(), src);
2952 }
2953
andnps(XmmRegister dst,XmmRegister src)2954 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2956 EmitOptionalRex32(dst, src);
2957 EmitUint8(0x0F);
2958 EmitUint8(0x55);
2959 EmitXmmRegisterOperand(dst.LowBits(), src);
2960 }
2961
pandn(XmmRegister dst,XmmRegister src)2962 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2963 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2964 EmitUint8(0x66);
2965 EmitOptionalRex32(dst, src);
2966 EmitUint8(0x0F);
2967 EmitUint8(0xDF);
2968 EmitXmmRegisterOperand(dst.LowBits(), src);
2969 }
2970
2971 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2972 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2973 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2974 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975 bool is_twobyte_form = false;
2976 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2977 if (!src2.NeedsRex()) {
2978 is_twobyte_form = true;
2979 }
2980 X86_64ManagedRegister vvvv_reg =
2981 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2982 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2983 if (is_twobyte_form) {
2984 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2985 } else {
2986 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2987 /*X=*/ false,
2988 src2.NeedsRex(),
2989 SET_VEX_M_0F);
2990 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2991 }
2992 EmitUint8(ByteZero);
2993 EmitUint8(ByteOne);
2994 if (!is_twobyte_form) {
2995 EmitUint8(ByteTwo);
2996 }
2997 EmitUint8(0xDF);
2998 EmitXmmRegisterOperand(dst.LowBits(), src2);
2999 }
3000
3001 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3002 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3003 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005 bool is_twobyte_form = false;
3006 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3007 if (!src2.NeedsRex()) {
3008 is_twobyte_form = true;
3009 }
3010 X86_64ManagedRegister vvvv_reg =
3011 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3012 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3013 if (is_twobyte_form) {
3014 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3015 } else {
3016 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3017 /*X=*/ false,
3018 src2.NeedsRex(),
3019 SET_VEX_M_0F);
3020 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3021 }
3022 EmitUint8(ByteZero);
3023 EmitUint8(ByteOne);
3024 if (!is_twobyte_form) {
3025 EmitUint8(ByteTwo);
3026 }
3027 EmitUint8(0x55);
3028 EmitXmmRegisterOperand(dst.LowBits(), src2);
3029 }
3030
3031 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3032 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3033 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3035 bool is_twobyte_form = false;
3036 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3037 if (!src2.NeedsRex()) {
3038 is_twobyte_form = true;
3039 }
3040 X86_64ManagedRegister vvvv_reg =
3041 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3042 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3043 if (is_twobyte_form) {
3044 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3045 } else {
3046 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3047 /*X=*/ false,
3048 src2.NeedsRex(),
3049 SET_VEX_M_0F);
3050 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3051 }
3052 EmitUint8(ByteZero);
3053 EmitUint8(ByteOne);
3054 if (!is_twobyte_form) {
3055 EmitUint8(ByteTwo);
3056 }
3057 EmitUint8(0x55);
3058 EmitXmmRegisterOperand(dst.LowBits(), src2);
3059 }
3060
orpd(XmmRegister dst,XmmRegister src)3061 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
3062 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3063 EmitUint8(0x66);
3064 EmitOptionalRex32(dst, src);
3065 EmitUint8(0x0F);
3066 EmitUint8(0x56);
3067 EmitXmmRegisterOperand(dst.LowBits(), src);
3068 }
3069
orps(XmmRegister dst,XmmRegister src)3070 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3071 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3072 EmitOptionalRex32(dst, src);
3073 EmitUint8(0x0F);
3074 EmitUint8(0x56);
3075 EmitXmmRegisterOperand(dst.LowBits(), src);
3076 }
3077
por(XmmRegister dst,XmmRegister src)3078 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3079 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3080 EmitUint8(0x66);
3081 EmitOptionalRex32(dst, src);
3082 EmitUint8(0x0F);
3083 EmitUint8(0xEB);
3084 EmitXmmRegisterOperand(dst.LowBits(), src);
3085 }
3086
3087 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3088 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3089 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3090 bool is_twobyte_form = false;
3091 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3092 if (!src2.NeedsRex()) {
3093 is_twobyte_form = true;
3094 } else if (!src1.NeedsRex()) {
3095 return vpor(dst, src2, src1);
3096 }
3097 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3098 X86_64ManagedRegister vvvv_reg =
3099 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3100 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3101 if (is_twobyte_form) {
3102 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3103 } else {
3104 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3105 /*X=*/ false,
3106 src2.NeedsRex(),
3107 SET_VEX_M_0F);
3108 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3109 }
3110 EmitUint8(ByteZero);
3111 EmitUint8(ByteOne);
3112 if (!is_twobyte_form) {
3113 EmitUint8(ByteTwo);
3114 }
3115 EmitUint8(0xEB);
3116 EmitXmmRegisterOperand(dst.LowBits(), src2);
3117 }
3118
3119 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3120 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3121 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3122 bool is_twobyte_form = false;
3123 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3124 if (!src2.NeedsRex()) {
3125 is_twobyte_form = true;
3126 } else if (!src1.NeedsRex()) {
3127 return vorps(dst, src2, src1);
3128 }
3129 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130 X86_64ManagedRegister vvvv_reg =
3131 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3132 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3133 if (is_twobyte_form) {
3134 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3135 } else {
3136 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3137 /*X=*/ false,
3138 src2.NeedsRex(),
3139 SET_VEX_M_0F);
3140 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3141 }
3142 EmitUint8(ByteZero);
3143 EmitUint8(ByteOne);
3144 if (!is_twobyte_form) {
3145 EmitUint8(ByteTwo);
3146 }
3147 EmitUint8(0x56);
3148 EmitXmmRegisterOperand(dst.LowBits(), src2);
3149 }
3150
3151 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3152 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3153 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3154 bool is_twobyte_form = false;
3155 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3156 if (!src2.NeedsRex()) {
3157 is_twobyte_form = true;
3158 } else if (!src1.NeedsRex()) {
3159 return vorpd(dst, src2, src1);
3160 }
3161 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3162 X86_64ManagedRegister vvvv_reg =
3163 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3164 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3165 if (is_twobyte_form) {
3166 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3167 } else {
3168 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3169 /*X=*/ false,
3170 src2.NeedsRex(),
3171 SET_VEX_M_0F);
3172 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3173 }
3174 EmitUint8(ByteZero);
3175 EmitUint8(ByteOne);
3176 if (!is_twobyte_form) {
3177 EmitUint8(ByteTwo);
3178 }
3179 EmitUint8(0x56);
3180 EmitXmmRegisterOperand(dst.LowBits(), src2);
3181 }
3182
pavgb(XmmRegister dst,XmmRegister src)3183 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3184 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3185 EmitUint8(0x66);
3186 EmitOptionalRex32(dst, src);
3187 EmitUint8(0x0F);
3188 EmitUint8(0xE0);
3189 EmitXmmRegisterOperand(dst.LowBits(), src);
3190 }
3191
pavgw(XmmRegister dst,XmmRegister src)3192 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3193 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3194 EmitUint8(0x66);
3195 EmitOptionalRex32(dst, src);
3196 EmitUint8(0x0F);
3197 EmitUint8(0xE3);
3198 EmitXmmRegisterOperand(dst.LowBits(), src);
3199 }
3200
psadbw(XmmRegister dst,XmmRegister src)3201 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3202 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3203 EmitUint8(0x66);
3204 EmitOptionalRex32(dst, src);
3205 EmitUint8(0x0F);
3206 EmitUint8(0xF6);
3207 EmitXmmRegisterOperand(dst.LowBits(), src);
3208 }
3209
pmaddwd(XmmRegister dst,XmmRegister src)3210 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3212 EmitUint8(0x66);
3213 EmitOptionalRex32(dst, src);
3214 EmitUint8(0x0F);
3215 EmitUint8(0xF5);
3216 EmitXmmRegisterOperand(dst.LowBits(), src);
3217 }
3218
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3219 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3220 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3221 bool is_twobyte_form = false;
3222 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3223 if (!src2.NeedsRex()) {
3224 is_twobyte_form = true;
3225 } else if (!src1.NeedsRex()) {
3226 return vpmaddwd(dst, src2, src1);
3227 }
3228 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3229 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3230 X86_64ManagedRegister vvvv_reg =
3231 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3232 if (is_twobyte_form) {
3233 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3234 } else {
3235 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3236 /*X=*/ false,
3237 src2.NeedsRex(),
3238 SET_VEX_M_0F);
3239 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3240 }
3241 EmitUint8(ByteZero);
3242 EmitUint8(ByteOne);
3243 if (!is_twobyte_form) {
3244 EmitUint8(ByteTwo);
3245 }
3246 EmitUint8(0xF5);
3247 EmitXmmRegisterOperand(dst.LowBits(), src2);
3248 }
3249
phaddw(XmmRegister dst,XmmRegister src)3250 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3251 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252 EmitUint8(0x66);
3253 EmitOptionalRex32(dst, src);
3254 EmitUint8(0x0F);
3255 EmitUint8(0x38);
3256 EmitUint8(0x01);
3257 EmitXmmRegisterOperand(dst.LowBits(), src);
3258 }
3259
phaddd(XmmRegister dst,XmmRegister src)3260 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3261 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3262 EmitUint8(0x66);
3263 EmitOptionalRex32(dst, src);
3264 EmitUint8(0x0F);
3265 EmitUint8(0x38);
3266 EmitUint8(0x02);
3267 EmitXmmRegisterOperand(dst.LowBits(), src);
3268 }
3269
haddps(XmmRegister dst,XmmRegister src)3270 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3271 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3272 EmitUint8(0xF2);
3273 EmitOptionalRex32(dst, src);
3274 EmitUint8(0x0F);
3275 EmitUint8(0x7C);
3276 EmitXmmRegisterOperand(dst.LowBits(), src);
3277 }
3278
haddpd(XmmRegister dst,XmmRegister src)3279 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3280 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3281 EmitUint8(0x66);
3282 EmitOptionalRex32(dst, src);
3283 EmitUint8(0x0F);
3284 EmitUint8(0x7C);
3285 EmitXmmRegisterOperand(dst.LowBits(), src);
3286 }
3287
phsubw(XmmRegister dst,XmmRegister src)3288 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3289 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3290 EmitUint8(0x66);
3291 EmitOptionalRex32(dst, src);
3292 EmitUint8(0x0F);
3293 EmitUint8(0x38);
3294 EmitUint8(0x05);
3295 EmitXmmRegisterOperand(dst.LowBits(), src);
3296 }
3297
phsubd(XmmRegister dst,XmmRegister src)3298 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3299 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3300 EmitUint8(0x66);
3301 EmitOptionalRex32(dst, src);
3302 EmitUint8(0x0F);
3303 EmitUint8(0x38);
3304 EmitUint8(0x06);
3305 EmitXmmRegisterOperand(dst.LowBits(), src);
3306 }
3307
hsubps(XmmRegister dst,XmmRegister src)3308 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3309 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3310 EmitUint8(0xF2);
3311 EmitOptionalRex32(dst, src);
3312 EmitUint8(0x0F);
3313 EmitUint8(0x7D);
3314 EmitXmmRegisterOperand(dst.LowBits(), src);
3315 }
3316
hsubpd(XmmRegister dst,XmmRegister src)3317 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3318 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3319 EmitUint8(0x66);
3320 EmitOptionalRex32(dst, src);
3321 EmitUint8(0x0F);
3322 EmitUint8(0x7D);
3323 EmitXmmRegisterOperand(dst.LowBits(), src);
3324 }
3325
pminsb(XmmRegister dst,XmmRegister src)3326 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3327 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3328 EmitUint8(0x66);
3329 EmitOptionalRex32(dst, src);
3330 EmitUint8(0x0F);
3331 EmitUint8(0x38);
3332 EmitUint8(0x38);
3333 EmitXmmRegisterOperand(dst.LowBits(), src);
3334 }
3335
pmaxsb(XmmRegister dst,XmmRegister src)3336 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3337 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3338 EmitUint8(0x66);
3339 EmitOptionalRex32(dst, src);
3340 EmitUint8(0x0F);
3341 EmitUint8(0x38);
3342 EmitUint8(0x3C);
3343 EmitXmmRegisterOperand(dst.LowBits(), src);
3344 }
3345
pminsw(XmmRegister dst,XmmRegister src)3346 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3347 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3348 EmitUint8(0x66);
3349 EmitOptionalRex32(dst, src);
3350 EmitUint8(0x0F);
3351 EmitUint8(0xEA);
3352 EmitXmmRegisterOperand(dst.LowBits(), src);
3353 }
3354
pmaxsw(XmmRegister dst,XmmRegister src)3355 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3356 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3357 EmitUint8(0x66);
3358 EmitOptionalRex32(dst, src);
3359 EmitUint8(0x0F);
3360 EmitUint8(0xEE);
3361 EmitXmmRegisterOperand(dst.LowBits(), src);
3362 }
3363
pminsd(XmmRegister dst,XmmRegister src)3364 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3366 EmitUint8(0x66);
3367 EmitOptionalRex32(dst, src);
3368 EmitUint8(0x0F);
3369 EmitUint8(0x38);
3370 EmitUint8(0x39);
3371 EmitXmmRegisterOperand(dst.LowBits(), src);
3372 }
3373
pmaxsd(XmmRegister dst,XmmRegister src)3374 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3375 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3376 EmitUint8(0x66);
3377 EmitOptionalRex32(dst, src);
3378 EmitUint8(0x0F);
3379 EmitUint8(0x38);
3380 EmitUint8(0x3D);
3381 EmitXmmRegisterOperand(dst.LowBits(), src);
3382 }
3383
pminub(XmmRegister dst,XmmRegister src)3384 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3386 EmitUint8(0x66);
3387 EmitOptionalRex32(dst, src);
3388 EmitUint8(0x0F);
3389 EmitUint8(0xDA);
3390 EmitXmmRegisterOperand(dst.LowBits(), src);
3391 }
3392
pmaxub(XmmRegister dst,XmmRegister src)3393 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3394 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3395 EmitUint8(0x66);
3396 EmitOptionalRex32(dst, src);
3397 EmitUint8(0x0F);
3398 EmitUint8(0xDE);
3399 EmitXmmRegisterOperand(dst.LowBits(), src);
3400 }
3401
pminuw(XmmRegister dst,XmmRegister src)3402 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3403 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3404 EmitUint8(0x66);
3405 EmitOptionalRex32(dst, src);
3406 EmitUint8(0x0F);
3407 EmitUint8(0x38);
3408 EmitUint8(0x3A);
3409 EmitXmmRegisterOperand(dst.LowBits(), src);
3410 }
3411
pmaxuw(XmmRegister dst,XmmRegister src)3412 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3413 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3414 EmitUint8(0x66);
3415 EmitOptionalRex32(dst, src);
3416 EmitUint8(0x0F);
3417 EmitUint8(0x38);
3418 EmitUint8(0x3E);
3419 EmitXmmRegisterOperand(dst.LowBits(), src);
3420 }
3421
pminud(XmmRegister dst,XmmRegister src)3422 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3423 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3424 EmitUint8(0x66);
3425 EmitOptionalRex32(dst, src);
3426 EmitUint8(0x0F);
3427 EmitUint8(0x38);
3428 EmitUint8(0x3B);
3429 EmitXmmRegisterOperand(dst.LowBits(), src);
3430 }
3431
pmaxud(XmmRegister dst,XmmRegister src)3432 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3433 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3434 EmitUint8(0x66);
3435 EmitOptionalRex32(dst, src);
3436 EmitUint8(0x0F);
3437 EmitUint8(0x38);
3438 EmitUint8(0x3F);
3439 EmitXmmRegisterOperand(dst.LowBits(), src);
3440 }
3441
minps(XmmRegister dst,XmmRegister src)3442 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3443 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3444 EmitOptionalRex32(dst, src);
3445 EmitUint8(0x0F);
3446 EmitUint8(0x5D);
3447 EmitXmmRegisterOperand(dst.LowBits(), src);
3448 }
3449
maxps(XmmRegister dst,XmmRegister src)3450 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3451 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3452 EmitOptionalRex32(dst, src);
3453 EmitUint8(0x0F);
3454 EmitUint8(0x5F);
3455 EmitXmmRegisterOperand(dst.LowBits(), src);
3456 }
3457
minpd(XmmRegister dst,XmmRegister src)3458 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3459 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3460 EmitUint8(0x66);
3461 EmitOptionalRex32(dst, src);
3462 EmitUint8(0x0F);
3463 EmitUint8(0x5D);
3464 EmitXmmRegisterOperand(dst.LowBits(), src);
3465 }
3466
maxpd(XmmRegister dst,XmmRegister src)3467 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3468 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3469 EmitUint8(0x66);
3470 EmitOptionalRex32(dst, src);
3471 EmitUint8(0x0F);
3472 EmitUint8(0x5F);
3473 EmitXmmRegisterOperand(dst.LowBits(), src);
3474 }
3475
pcmpeqb(XmmRegister dst,XmmRegister src)3476 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3477 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3478 EmitUint8(0x66);
3479 EmitOptionalRex32(dst, src);
3480 EmitUint8(0x0F);
3481 EmitUint8(0x74);
3482 EmitXmmRegisterOperand(dst.LowBits(), src);
3483 }
3484
pcmpeqw(XmmRegister dst,XmmRegister src)3485 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3486 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3487 EmitUint8(0x66);
3488 EmitOptionalRex32(dst, src);
3489 EmitUint8(0x0F);
3490 EmitUint8(0x75);
3491 EmitXmmRegisterOperand(dst.LowBits(), src);
3492 }
3493
pcmpeqd(XmmRegister dst,XmmRegister src)3494 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3495 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3496 EmitUint8(0x66);
3497 EmitOptionalRex32(dst, src);
3498 EmitUint8(0x0F);
3499 EmitUint8(0x76);
3500 EmitXmmRegisterOperand(dst.LowBits(), src);
3501 }
3502
pcmpeqq(XmmRegister dst,XmmRegister src)3503 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3504 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3505 EmitUint8(0x66);
3506 EmitOptionalRex32(dst, src);
3507 EmitUint8(0x0F);
3508 EmitUint8(0x38);
3509 EmitUint8(0x29);
3510 EmitXmmRegisterOperand(dst.LowBits(), src);
3511 }
3512
pcmpgtb(XmmRegister dst,XmmRegister src)3513 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3514 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3515 EmitUint8(0x66);
3516 EmitOptionalRex32(dst, src);
3517 EmitUint8(0x0F);
3518 EmitUint8(0x64);
3519 EmitXmmRegisterOperand(dst.LowBits(), src);
3520 }
3521
pcmpgtw(XmmRegister dst,XmmRegister src)3522 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3523 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3524 EmitUint8(0x66);
3525 EmitOptionalRex32(dst, src);
3526 EmitUint8(0x0F);
3527 EmitUint8(0x65);
3528 EmitXmmRegisterOperand(dst.LowBits(), src);
3529 }
3530
pcmpgtd(XmmRegister dst,XmmRegister src)3531 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3532 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3533 EmitUint8(0x66);
3534 EmitOptionalRex32(dst, src);
3535 EmitUint8(0x0F);
3536 EmitUint8(0x66);
3537 EmitXmmRegisterOperand(dst.LowBits(), src);
3538 }
3539
pcmpgtq(XmmRegister dst,XmmRegister src)3540 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3541 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3542 EmitUint8(0x66);
3543 EmitOptionalRex32(dst, src);
3544 EmitUint8(0x0F);
3545 EmitUint8(0x38);
3546 EmitUint8(0x37);
3547 EmitXmmRegisterOperand(dst.LowBits(), src);
3548 }
3549
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3550 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3551 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3552 EmitUint8(0x66);
3553 EmitOptionalRex32(dst, src);
3554 EmitUint8(0x0F);
3555 EmitUint8(0xC6);
3556 EmitXmmRegisterOperand(dst.LowBits(), src);
3557 EmitUint8(imm.value());
3558 }
3559
3560
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3561 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3562 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3563 EmitOptionalRex32(dst, src);
3564 EmitUint8(0x0F);
3565 EmitUint8(0xC6);
3566 EmitXmmRegisterOperand(dst.LowBits(), src);
3567 EmitUint8(imm.value());
3568 }
3569
3570
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3571 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3572 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3573 EmitUint8(0x66);
3574 EmitOptionalRex32(dst, src);
3575 EmitUint8(0x0F);
3576 EmitUint8(0x70);
3577 EmitXmmRegisterOperand(dst.LowBits(), src);
3578 EmitUint8(imm.value());
3579 }
3580
3581
punpcklbw(XmmRegister dst,XmmRegister src)3582 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3583 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3584 EmitUint8(0x66);
3585 EmitOptionalRex32(dst, src);
3586 EmitUint8(0x0F);
3587 EmitUint8(0x60);
3588 EmitXmmRegisterOperand(dst.LowBits(), src);
3589 }
3590
3591
punpcklwd(XmmRegister dst,XmmRegister src)3592 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3593 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3594 EmitUint8(0x66);
3595 EmitOptionalRex32(dst, src);
3596 EmitUint8(0x0F);
3597 EmitUint8(0x61);
3598 EmitXmmRegisterOperand(dst.LowBits(), src);
3599 }
3600
3601
punpckldq(XmmRegister dst,XmmRegister src)3602 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3603 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3604 EmitUint8(0x66);
3605 EmitOptionalRex32(dst, src);
3606 EmitUint8(0x0F);
3607 EmitUint8(0x62);
3608 EmitXmmRegisterOperand(dst.LowBits(), src);
3609 }
3610
3611
punpcklqdq(XmmRegister dst,XmmRegister src)3612 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3613 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614 EmitUint8(0x66);
3615 EmitOptionalRex32(dst, src);
3616 EmitUint8(0x0F);
3617 EmitUint8(0x6C);
3618 EmitXmmRegisterOperand(dst.LowBits(), src);
3619 }
3620
3621
punpckhbw(XmmRegister dst,XmmRegister src)3622 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3623 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3624 EmitUint8(0x66);
3625 EmitOptionalRex32(dst, src);
3626 EmitUint8(0x0F);
3627 EmitUint8(0x68);
3628 EmitXmmRegisterOperand(dst.LowBits(), src);
3629 }
3630
3631
punpckhwd(XmmRegister dst,XmmRegister src)3632 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3633 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3634 EmitUint8(0x66);
3635 EmitOptionalRex32(dst, src);
3636 EmitUint8(0x0F);
3637 EmitUint8(0x69);
3638 EmitXmmRegisterOperand(dst.LowBits(), src);
3639 }
3640
3641
punpckhdq(XmmRegister dst,XmmRegister src)3642 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3643 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3644 EmitUint8(0x66);
3645 EmitOptionalRex32(dst, src);
3646 EmitUint8(0x0F);
3647 EmitUint8(0x6A);
3648 EmitXmmRegisterOperand(dst.LowBits(), src);
3649 }
3650
3651
punpckhqdq(XmmRegister dst,XmmRegister src)3652 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3653 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3654 EmitUint8(0x66);
3655 EmitOptionalRex32(dst, src);
3656 EmitUint8(0x0F);
3657 EmitUint8(0x6D);
3658 EmitXmmRegisterOperand(dst.LowBits(), src);
3659 }
3660
3661
psllw(XmmRegister reg,const Immediate & shift_count)3662 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3663 DCHECK(shift_count.is_uint8());
3664 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3665 EmitUint8(0x66);
3666 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3667 EmitUint8(0x0F);
3668 EmitUint8(0x71);
3669 EmitXmmRegisterOperand(6, reg);
3670 EmitUint8(shift_count.value());
3671 }
3672
3673
pslld(XmmRegister reg,const Immediate & shift_count)3674 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3675 DCHECK(shift_count.is_uint8());
3676 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3677 EmitUint8(0x66);
3678 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3679 EmitUint8(0x0F);
3680 EmitUint8(0x72);
3681 EmitXmmRegisterOperand(6, reg);
3682 EmitUint8(shift_count.value());
3683 }
3684
3685
psllq(XmmRegister reg,const Immediate & shift_count)3686 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3687 DCHECK(shift_count.is_uint8());
3688 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3689 EmitUint8(0x66);
3690 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3691 EmitUint8(0x0F);
3692 EmitUint8(0x73);
3693 EmitXmmRegisterOperand(6, reg);
3694 EmitUint8(shift_count.value());
3695 }
3696
3697
psraw(XmmRegister reg,const Immediate & shift_count)3698 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3699 DCHECK(shift_count.is_uint8());
3700 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3701 EmitUint8(0x66);
3702 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3703 EmitUint8(0x0F);
3704 EmitUint8(0x71);
3705 EmitXmmRegisterOperand(4, reg);
3706 EmitUint8(shift_count.value());
3707 }
3708
3709
psrad(XmmRegister reg,const Immediate & shift_count)3710 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3711 DCHECK(shift_count.is_uint8());
3712 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3713 EmitUint8(0x66);
3714 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3715 EmitUint8(0x0F);
3716 EmitUint8(0x72);
3717 EmitXmmRegisterOperand(4, reg);
3718 EmitUint8(shift_count.value());
3719 }
3720
3721
psrlw(XmmRegister reg,const Immediate & shift_count)3722 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3723 DCHECK(shift_count.is_uint8());
3724 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3725 EmitUint8(0x66);
3726 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3727 EmitUint8(0x0F);
3728 EmitUint8(0x71);
3729 EmitXmmRegisterOperand(2, reg);
3730 EmitUint8(shift_count.value());
3731 }
3732
3733
psrld(XmmRegister reg,const Immediate & shift_count)3734 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3735 DCHECK(shift_count.is_uint8());
3736 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3737 EmitUint8(0x66);
3738 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3739 EmitUint8(0x0F);
3740 EmitUint8(0x72);
3741 EmitXmmRegisterOperand(2, reg);
3742 EmitUint8(shift_count.value());
3743 }
3744
3745
psrlq(XmmRegister reg,const Immediate & shift_count)3746 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3747 DCHECK(shift_count.is_uint8());
3748 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3749 EmitUint8(0x66);
3750 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3751 EmitUint8(0x0F);
3752 EmitUint8(0x73);
3753 EmitXmmRegisterOperand(2, reg);
3754 EmitUint8(shift_count.value());
3755 }
3756
3757
psrldq(XmmRegister reg,const Immediate & shift_count)3758 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3759 DCHECK(shift_count.is_uint8());
3760 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761 EmitUint8(0x66);
3762 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3763 EmitUint8(0x0F);
3764 EmitUint8(0x73);
3765 EmitXmmRegisterOperand(3, reg);
3766 EmitUint8(shift_count.value());
3767 }
3768
3769
fldl(const Address & src)3770 void X86_64Assembler::fldl(const Address& src) {
3771 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3772 EmitUint8(0xDD);
3773 EmitOperand(0, src);
3774 }
3775
3776
fstl(const Address & dst)3777 void X86_64Assembler::fstl(const Address& dst) {
3778 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3779 EmitUint8(0xDD);
3780 EmitOperand(2, dst);
3781 }
3782
3783
fstpl(const Address & dst)3784 void X86_64Assembler::fstpl(const Address& dst) {
3785 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3786 EmitUint8(0xDD);
3787 EmitOperand(3, dst);
3788 }
3789
3790
fstsw()3791 void X86_64Assembler::fstsw() {
3792 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3793 EmitUint8(0x9B);
3794 EmitUint8(0xDF);
3795 EmitUint8(0xE0);
3796 }
3797
3798
fnstcw(const Address & dst)3799 void X86_64Assembler::fnstcw(const Address& dst) {
3800 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3801 EmitUint8(0xD9);
3802 EmitOperand(7, dst);
3803 }
3804
3805
fldcw(const Address & src)3806 void X86_64Assembler::fldcw(const Address& src) {
3807 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3808 EmitUint8(0xD9);
3809 EmitOperand(5, src);
3810 }
3811
3812
fistpl(const Address & dst)3813 void X86_64Assembler::fistpl(const Address& dst) {
3814 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3815 EmitUint8(0xDF);
3816 EmitOperand(7, dst);
3817 }
3818
3819
fistps(const Address & dst)3820 void X86_64Assembler::fistps(const Address& dst) {
3821 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3822 EmitUint8(0xDB);
3823 EmitOperand(3, dst);
3824 }
3825
3826
fildl(const Address & src)3827 void X86_64Assembler::fildl(const Address& src) {
3828 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3829 EmitUint8(0xDF);
3830 EmitOperand(5, src);
3831 }
3832
3833
filds(const Address & src)3834 void X86_64Assembler::filds(const Address& src) {
3835 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3836 EmitUint8(0xDB);
3837 EmitOperand(0, src);
3838 }
3839
3840
fincstp()3841 void X86_64Assembler::fincstp() {
3842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3843 EmitUint8(0xD9);
3844 EmitUint8(0xF7);
3845 }
3846
3847
ffree(const Immediate & index)3848 void X86_64Assembler::ffree(const Immediate& index) {
3849 CHECK_LT(index.value(), 7);
3850 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3851 EmitUint8(0xDD);
3852 EmitUint8(0xC0 + index.value());
3853 }
3854
3855
fsin()3856 void X86_64Assembler::fsin() {
3857 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3858 EmitUint8(0xD9);
3859 EmitUint8(0xFE);
3860 }
3861
3862
fcos()3863 void X86_64Assembler::fcos() {
3864 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3865 EmitUint8(0xD9);
3866 EmitUint8(0xFF);
3867 }
3868
3869
fptan()3870 void X86_64Assembler::fptan() {
3871 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3872 EmitUint8(0xD9);
3873 EmitUint8(0xF2);
3874 }
3875
fucompp()3876 void X86_64Assembler::fucompp() {
3877 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3878 EmitUint8(0xDA);
3879 EmitUint8(0xE9);
3880 }
3881
3882
fprem()3883 void X86_64Assembler::fprem() {
3884 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3885 EmitUint8(0xD9);
3886 EmitUint8(0xF8);
3887 }
3888
3889
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))3890 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
3891 CpuRegister src,
3892 void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
3893 Register src_reg = src.AsRegister();
3894 Register dst_reg = dst.AsRegister();
3895 if (src_reg != RAX && dst_reg != RAX) {
3896 return false;
3897 }
3898 if (dst_reg == RAX) {
3899 std::swap(src_reg, dst_reg);
3900 }
3901 if (dst_reg != RAX) {
3902 // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
3903 (this->*prefix_fn)(CpuRegister(dst_reg));
3904 }
3905 EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
3906 return true;
3907 }
3908
3909
xchgb(CpuRegister dst,CpuRegister src)3910 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
3911 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3912 // There is no short version for AL.
3913 EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
3914 EmitUint8(0x86);
3915 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3916 }
3917
3918
xchgb(CpuRegister reg,const Address & address)3919 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
3920 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921 EmitOptionalByteRegNormalizingRex32(reg, address);
3922 EmitUint8(0x86);
3923 EmitOperand(reg.LowBits(), address);
3924 }
3925
3926
xchgw(CpuRegister dst,CpuRegister src)3927 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
3928 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929 EmitOperandSizeOverride();
3930 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3931 // A short version for AX.
3932 return;
3933 }
3934 // General case.
3935 EmitOptionalRex32(dst, src);
3936 EmitUint8(0x87);
3937 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3938 }
3939
3940
xchgw(CpuRegister reg,const Address & address)3941 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
3942 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3943 EmitOperandSizeOverride();
3944 EmitOptionalRex32(reg, address);
3945 EmitUint8(0x87);
3946 EmitOperand(reg.LowBits(), address);
3947 }
3948
3949
xchgl(CpuRegister dst,CpuRegister src)3950 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3952 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3953 // A short version for EAX.
3954 return;
3955 }
3956 // General case.
3957 EmitOptionalRex32(dst, src);
3958 EmitUint8(0x87);
3959 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3960 }
3961
3962
xchgl(CpuRegister reg,const Address & address)3963 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3964 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3965 EmitOptionalRex32(reg, address);
3966 EmitUint8(0x87);
3967 EmitOperand(reg.LowBits(), address);
3968 }
3969
3970
xchgq(CpuRegister dst,CpuRegister src)3971 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3972 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3973 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
3974 // A short version for RAX.
3975 return;
3976 }
3977 // General case.
3978 EmitRex64(dst, src);
3979 EmitUint8(0x87);
3980 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3981 }
3982
3983
xchgq(CpuRegister reg,const Address & address)3984 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
3985 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3986 EmitRex64(reg, address);
3987 EmitUint8(0x87);
3988 EmitOperand(reg.LowBits(), address);
3989 }
3990
3991
xaddb(CpuRegister dst,CpuRegister src)3992 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
3993 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3994 EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
3995 EmitUint8(0x0F);
3996 EmitUint8(0xC0);
3997 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3998 }
3999
4000
xaddb(const Address & address,CpuRegister reg)4001 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
4002 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4003 EmitOptionalByteRegNormalizingRex32(reg, address);
4004 EmitUint8(0x0F);
4005 EmitUint8(0xC0);
4006 EmitOperand(reg.LowBits(), address);
4007 }
4008
4009
xaddw(CpuRegister dst,CpuRegister src)4010 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
4011 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4012 EmitOperandSizeOverride();
4013 EmitOptionalRex32(src, dst);
4014 EmitUint8(0x0F);
4015 EmitUint8(0xC1);
4016 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4017 }
4018
4019
xaddw(const Address & address,CpuRegister reg)4020 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
4021 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4022 EmitOperandSizeOverride();
4023 EmitOptionalRex32(reg, address);
4024 EmitUint8(0x0F);
4025 EmitUint8(0xC1);
4026 EmitOperand(reg.LowBits(), address);
4027 }
4028
4029
xaddl(CpuRegister dst,CpuRegister src)4030 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
4031 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4032 EmitOptionalRex32(src, dst);
4033 EmitUint8(0x0F);
4034 EmitUint8(0xC1);
4035 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4036 }
4037
4038
xaddl(const Address & address,CpuRegister reg)4039 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
4040 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4041 EmitOptionalRex32(reg, address);
4042 EmitUint8(0x0F);
4043 EmitUint8(0xC1);
4044 EmitOperand(reg.LowBits(), address);
4045 }
4046
4047
xaddq(CpuRegister dst,CpuRegister src)4048 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
4049 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4050 EmitRex64(src, dst);
4051 EmitUint8(0x0F);
4052 EmitUint8(0xC1);
4053 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4054 }
4055
4056
xaddq(const Address & address,CpuRegister reg)4057 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
4058 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4059 EmitRex64(reg, address);
4060 EmitUint8(0x0F);
4061 EmitUint8(0xC1);
4062 EmitOperand(reg.LowBits(), address);
4063 }
4064
4065
cmpb(const Address & address,const Immediate & imm)4066 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
4067 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4068 CHECK(imm.is_int32());
4069 EmitOptionalRex32(address);
4070 EmitUint8(0x80);
4071 EmitOperand(7, address);
4072 EmitUint8(imm.value() & 0xFF);
4073 }
4074
4075
cmpw(const Address & address,const Immediate & imm)4076 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
4077 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4078 CHECK(imm.is_int32());
4079 EmitOperandSizeOverride();
4080 EmitOptionalRex32(address);
4081 EmitComplex(7, address, imm, /* is_16_op= */ true);
4082 }
4083
4084
cmpl(CpuRegister reg,const Immediate & imm)4085 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
4086 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4087 CHECK(imm.is_int32());
4088 EmitOptionalRex32(reg);
4089 EmitComplex(7, Operand(reg), imm);
4090 }
4091
4092
cmpl(CpuRegister reg0,CpuRegister reg1)4093 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
4094 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4095 EmitOptionalRex32(reg0, reg1);
4096 EmitUint8(0x3B);
4097 EmitOperand(reg0.LowBits(), Operand(reg1));
4098 }
4099
4100
cmpl(CpuRegister reg,const Address & address)4101 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
4102 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4103 EmitOptionalRex32(reg, address);
4104 EmitUint8(0x3B);
4105 EmitOperand(reg.LowBits(), address);
4106 }
4107
4108
cmpl(const Address & address,CpuRegister reg)4109 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
4110 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4111 EmitOptionalRex32(reg, address);
4112 EmitUint8(0x39);
4113 EmitOperand(reg.LowBits(), address);
4114 }
4115
4116
cmpl(const Address & address,const Immediate & imm)4117 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
4118 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4119 CHECK(imm.is_int32());
4120 EmitOptionalRex32(address);
4121 EmitComplex(7, address, imm);
4122 }
4123
4124
cmpq(CpuRegister reg0,CpuRegister reg1)4125 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
4126 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4127 EmitRex64(reg0, reg1);
4128 EmitUint8(0x3B);
4129 EmitOperand(reg0.LowBits(), Operand(reg1));
4130 }
4131
4132
cmpq(CpuRegister reg,const Immediate & imm)4133 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
4134 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4135 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
4136 EmitRex64(reg);
4137 EmitComplex(7, Operand(reg), imm);
4138 }
4139
4140
cmpq(CpuRegister reg,const Address & address)4141 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
4142 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4143 EmitRex64(reg, address);
4144 EmitUint8(0x3B);
4145 EmitOperand(reg.LowBits(), address);
4146 }
4147
4148
cmpq(const Address & address,const Immediate & imm)4149 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
4150 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
4151 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4152 EmitRex64(address);
4153 EmitComplex(7, address, imm);
4154 }
4155
4156
addl(CpuRegister dst,CpuRegister src)4157 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
4158 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4159 EmitOptionalRex32(dst, src);
4160 EmitUint8(0x03);
4161 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4162 }
4163
4164
addl(CpuRegister reg,const Address & address)4165 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
4166 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4167 EmitOptionalRex32(reg, address);
4168 EmitUint8(0x03);
4169 EmitOperand(reg.LowBits(), address);
4170 }
4171
4172
testl(CpuRegister reg1,CpuRegister reg2)4173 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
4174 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4175 EmitOptionalRex32(reg1, reg2);
4176 EmitUint8(0x85);
4177 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4178 }
4179
4180
testl(CpuRegister reg,const Address & address)4181 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
4182 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4183 EmitOptionalRex32(reg, address);
4184 EmitUint8(0x85);
4185 EmitOperand(reg.LowBits(), address);
4186 }
4187
4188
testl(CpuRegister reg,const Immediate & immediate)4189 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
4190 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4191 // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
4192 // we only test the byte CpuRegister to keep the encoding short.
4193 if (immediate.is_uint8() && reg.AsRegister() < 4) {
4194 // Use zero-extended 8-bit immediate.
4195 if (reg.AsRegister() == RAX) {
4196 EmitUint8(0xA8);
4197 } else {
4198 EmitUint8(0xF6);
4199 EmitUint8(0xC0 + reg.AsRegister());
4200 }
4201 EmitUint8(immediate.value() & 0xFF);
4202 } else if (reg.AsRegister() == RAX) {
4203 // Use short form if the destination is RAX.
4204 EmitUint8(0xA9);
4205 EmitImmediate(immediate);
4206 } else {
4207 EmitOptionalRex32(reg);
4208 EmitUint8(0xF7);
4209 EmitOperand(0, Operand(reg));
4210 EmitImmediate(immediate);
4211 }
4212 }
4213
4214
testq(CpuRegister reg1,CpuRegister reg2)4215 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4216 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4217 EmitRex64(reg1, reg2);
4218 EmitUint8(0x85);
4219 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4220 }
4221
4222
testq(CpuRegister reg,const Address & address)4223 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4224 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4225 EmitRex64(reg, address);
4226 EmitUint8(0x85);
4227 EmitOperand(reg.LowBits(), address);
4228 }
4229
4230
testb(const Address & dst,const Immediate & imm)4231 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4232 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4233 EmitOptionalRex32(dst);
4234 EmitUint8(0xF6);
4235 EmitOperand(Register::RAX, dst);
4236 CHECK(imm.is_int8());
4237 EmitUint8(imm.value() & 0xFF);
4238 }
4239
4240
testl(const Address & dst,const Immediate & imm)4241 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4243 EmitOptionalRex32(dst);
4244 EmitUint8(0xF7);
4245 EmitOperand(0, dst);
4246 EmitImmediate(imm);
4247 }
4248
4249
andl(CpuRegister dst,CpuRegister src)4250 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4251 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4252 EmitOptionalRex32(dst, src);
4253 EmitUint8(0x23);
4254 EmitOperand(dst.LowBits(), Operand(src));
4255 }
4256
4257
andl(CpuRegister reg,const Address & address)4258 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4259 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4260 EmitOptionalRex32(reg, address);
4261 EmitUint8(0x23);
4262 EmitOperand(reg.LowBits(), address);
4263 }
4264
4265
andl(CpuRegister dst,const Immediate & imm)4266 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4267 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4268 EmitOptionalRex32(dst);
4269 EmitComplex(4, Operand(dst), imm);
4270 }
4271
4272
andq(CpuRegister reg,const Immediate & imm)4273 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4274 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4275 CHECK(imm.is_int32()); // andq only supports 32b immediate.
4276 EmitRex64(reg);
4277 EmitComplex(4, Operand(reg), imm);
4278 }
4279
4280
andq(CpuRegister dst,CpuRegister src)4281 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4282 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4283 EmitRex64(dst, src);
4284 EmitUint8(0x23);
4285 EmitOperand(dst.LowBits(), Operand(src));
4286 }
4287
4288
andq(CpuRegister dst,const Address & src)4289 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4290 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4291 EmitRex64(dst, src);
4292 EmitUint8(0x23);
4293 EmitOperand(dst.LowBits(), src);
4294 }
4295
4296
andw(const Address & address,const Immediate & imm)4297 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4298 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4299 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4300 EmitUint8(0x66);
4301 EmitOptionalRex32(address);
4302 EmitComplex(4, address, imm, /* is_16_op= */ true);
4303 }
4304
4305
orl(CpuRegister dst,CpuRegister src)4306 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4307 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4308 EmitOptionalRex32(dst, src);
4309 EmitUint8(0x0B);
4310 EmitOperand(dst.LowBits(), Operand(src));
4311 }
4312
4313
orl(CpuRegister reg,const Address & address)4314 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4315 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4316 EmitOptionalRex32(reg, address);
4317 EmitUint8(0x0B);
4318 EmitOperand(reg.LowBits(), address);
4319 }
4320
4321
orl(CpuRegister dst,const Immediate & imm)4322 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4323 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4324 EmitOptionalRex32(dst);
4325 EmitComplex(1, Operand(dst), imm);
4326 }
4327
4328
orq(CpuRegister dst,const Immediate & imm)4329 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4330 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4331 CHECK(imm.is_int32()); // orq only supports 32b immediate.
4332 EmitRex64(dst);
4333 EmitComplex(1, Operand(dst), imm);
4334 }
4335
4336
orq(CpuRegister dst,CpuRegister src)4337 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4338 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4339 EmitRex64(dst, src);
4340 EmitUint8(0x0B);
4341 EmitOperand(dst.LowBits(), Operand(src));
4342 }
4343
4344
orq(CpuRegister dst,const Address & src)4345 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4346 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4347 EmitRex64(dst, src);
4348 EmitUint8(0x0B);
4349 EmitOperand(dst.LowBits(), src);
4350 }
4351
4352
xorl(CpuRegister dst,CpuRegister src)4353 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4354 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4355 EmitOptionalRex32(dst, src);
4356 EmitUint8(0x33);
4357 EmitOperand(dst.LowBits(), Operand(src));
4358 }
4359
4360
xorl(CpuRegister reg,const Address & address)4361 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4362 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4363 EmitOptionalRex32(reg, address);
4364 EmitUint8(0x33);
4365 EmitOperand(reg.LowBits(), address);
4366 }
4367
4368
xorl(CpuRegister dst,const Immediate & imm)4369 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4370 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4371 EmitOptionalRex32(dst);
4372 EmitComplex(6, Operand(dst), imm);
4373 }
4374
4375
xorq(CpuRegister dst,CpuRegister src)4376 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4377 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4378 EmitRex64(dst, src);
4379 EmitUint8(0x33);
4380 EmitOperand(dst.LowBits(), Operand(src));
4381 }
4382
4383
xorq(CpuRegister dst,const Immediate & imm)4384 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4386 CHECK(imm.is_int32()); // xorq only supports 32b immediate.
4387 EmitRex64(dst);
4388 EmitComplex(6, Operand(dst), imm);
4389 }
4390
xorq(CpuRegister dst,const Address & src)4391 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4392 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4393 EmitRex64(dst, src);
4394 EmitUint8(0x33);
4395 EmitOperand(dst.LowBits(), src);
4396 }
4397
4398
4399 #if 0
4400 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4401 // REX.WRXB
4402 // W - 64-bit operand
4403 // R - MODRM.reg
4404 // X - SIB.index
4405 // B - MODRM.rm/SIB.base
4406 uint8_t rex = force ? 0x40 : 0;
4407 if (w) {
4408 rex |= 0x48; // REX.W000
4409 }
4410 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4411 rex |= 0x44; // REX.0R00
4412 *r = static_cast<Register>(*r - 8);
4413 }
4414 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4415 rex |= 0x42; // REX.00X0
4416 *x = static_cast<Register>(*x - 8);
4417 }
4418 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4419 rex |= 0x41; // REX.000B
4420 *b = static_cast<Register>(*b - 8);
4421 }
4422 if (rex != 0) {
4423 EmitUint8(rex);
4424 }
4425 }
4426
4427 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4428 // REX.WRXB
4429 // W - 64-bit operand
4430 // R - MODRM.reg
4431 // X - SIB.index
4432 // B - MODRM.rm/SIB.base
4433 uint8_t rex = mem->rex();
4434 if (force) {
4435 rex |= 0x40; // REX.0000
4436 }
4437 if (w) {
4438 rex |= 0x48; // REX.W000
4439 }
4440 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4441 rex |= 0x44; // REX.0R00
4442 *dst = static_cast<Register>(*dst - 8);
4443 }
4444 if (rex != 0) {
4445 EmitUint8(rex);
4446 }
4447 }
4448
4449 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4450 #endif
4451
addl(CpuRegister reg,const Immediate & imm)4452 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4453 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4454 EmitOptionalRex32(reg);
4455 EmitComplex(0, Operand(reg), imm);
4456 }
4457
4458
addw(CpuRegister reg,const Immediate & imm)4459 void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) {
4460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4461 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4462 EmitUint8(0x66);
4463 EmitOptionalRex32(reg);
4464 EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true);
4465 }
4466
4467
addq(CpuRegister reg,const Immediate & imm)4468 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4469 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4470 CHECK(imm.is_int32()); // addq only supports 32b immediate.
4471 EmitRex64(reg);
4472 EmitComplex(0, Operand(reg), imm);
4473 }
4474
4475
addq(CpuRegister dst,const Address & address)4476 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4477 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4478 EmitRex64(dst, address);
4479 EmitUint8(0x03);
4480 EmitOperand(dst.LowBits(), address);
4481 }
4482
4483
addq(CpuRegister dst,CpuRegister src)4484 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4485 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4486 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4487 EmitRex64(src, dst);
4488 EmitUint8(0x01);
4489 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4490 }
4491
4492
addl(const Address & address,CpuRegister reg)4493 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4494 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4495 EmitOptionalRex32(reg, address);
4496 EmitUint8(0x01);
4497 EmitOperand(reg.LowBits(), address);
4498 }
4499
4500
addl(const Address & address,const Immediate & imm)4501 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4502 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4503 EmitOptionalRex32(address);
4504 EmitComplex(0, address, imm);
4505 }
4506
4507
addw(const Address & address,const Immediate & imm)4508 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4509 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4510 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4511 EmitUint8(0x66);
4512 EmitOptionalRex32(address);
4513 EmitComplex(0, address, imm, /* is_16_op= */ true);
4514 }
4515
4516
addw(const Address & address,CpuRegister reg)4517 void X86_64Assembler::addw(const Address& address, CpuRegister reg) {
4518 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4519 EmitOperandSizeOverride();
4520 EmitOptionalRex32(reg, address);
4521 EmitUint8(0x01);
4522 EmitOperand(reg.LowBits(), address);
4523 }
4524
4525
subl(CpuRegister dst,CpuRegister src)4526 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4527 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4528 EmitOptionalRex32(dst, src);
4529 EmitUint8(0x2B);
4530 EmitOperand(dst.LowBits(), Operand(src));
4531 }
4532
4533
subl(CpuRegister reg,const Immediate & imm)4534 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4536 EmitOptionalRex32(reg);
4537 EmitComplex(5, Operand(reg), imm);
4538 }
4539
4540
subq(CpuRegister reg,const Immediate & imm)4541 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4542 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4543 CHECK(imm.is_int32()); // subq only supports 32b immediate.
4544 EmitRex64(reg);
4545 EmitComplex(5, Operand(reg), imm);
4546 }
4547
4548
subq(CpuRegister dst,CpuRegister src)4549 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4550 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4551 EmitRex64(dst, src);
4552 EmitUint8(0x2B);
4553 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4554 }
4555
4556
subq(CpuRegister reg,const Address & address)4557 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4558 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4559 EmitRex64(reg, address);
4560 EmitUint8(0x2B);
4561 EmitOperand(reg.LowBits() & 7, address);
4562 }
4563
4564
subl(CpuRegister reg,const Address & address)4565 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4566 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4567 EmitOptionalRex32(reg, address);
4568 EmitUint8(0x2B);
4569 EmitOperand(reg.LowBits(), address);
4570 }
4571
4572
cdq()4573 void X86_64Assembler::cdq() {
4574 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4575 EmitUint8(0x99);
4576 }
4577
4578
cqo()4579 void X86_64Assembler::cqo() {
4580 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4581 EmitRex64();
4582 EmitUint8(0x99);
4583 }
4584
4585
idivl(CpuRegister reg)4586 void X86_64Assembler::idivl(CpuRegister reg) {
4587 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4588 EmitOptionalRex32(reg);
4589 EmitUint8(0xF7);
4590 EmitUint8(0xF8 | reg.LowBits());
4591 }
4592
4593
idivq(CpuRegister reg)4594 void X86_64Assembler::idivq(CpuRegister reg) {
4595 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4596 EmitRex64(reg);
4597 EmitUint8(0xF7);
4598 EmitUint8(0xF8 | reg.LowBits());
4599 }
4600
4601
divl(CpuRegister reg)4602 void X86_64Assembler::divl(CpuRegister reg) {
4603 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4604 EmitOptionalRex32(reg);
4605 EmitUint8(0xF7);
4606 EmitUint8(0xF0 | reg.LowBits());
4607 }
4608
4609
divq(CpuRegister reg)4610 void X86_64Assembler::divq(CpuRegister reg) {
4611 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4612 EmitRex64(reg);
4613 EmitUint8(0xF7);
4614 EmitUint8(0xF0 | reg.LowBits());
4615 }
4616
4617
imull(CpuRegister dst,CpuRegister src)4618 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4619 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4620 EmitOptionalRex32(dst, src);
4621 EmitUint8(0x0F);
4622 EmitUint8(0xAF);
4623 EmitOperand(dst.LowBits(), Operand(src));
4624 }
4625
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4626 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4627 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4628 CHECK(imm.is_int32()); // imull only supports 32b immediate.
4629
4630 EmitOptionalRex32(dst, src);
4631
4632 // See whether imm can be represented as a sign-extended 8bit value.
4633 int32_t v32 = static_cast<int32_t>(imm.value());
4634 if (IsInt<8>(v32)) {
4635 // Sign-extension works.
4636 EmitUint8(0x6B);
4637 EmitOperand(dst.LowBits(), Operand(src));
4638 EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4639 } else {
4640 // Not representable, use full immediate.
4641 EmitUint8(0x69);
4642 EmitOperand(dst.LowBits(), Operand(src));
4643 EmitImmediate(imm);
4644 }
4645 }
4646
4647
imull(CpuRegister reg,const Immediate & imm)4648 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4649 imull(reg, reg, imm);
4650 }
4651
4652
imull(CpuRegister reg,const Address & address)4653 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4654 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4655 EmitOptionalRex32(reg, address);
4656 EmitUint8(0x0F);
4657 EmitUint8(0xAF);
4658 EmitOperand(reg.LowBits(), address);
4659 }
4660
4661
imulq(CpuRegister dst,CpuRegister src)4662 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4663 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4664 EmitRex64(dst, src);
4665 EmitUint8(0x0F);
4666 EmitUint8(0xAF);
4667 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4668 }
4669
4670
imulq(CpuRegister reg,const Immediate & imm)4671 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4672 imulq(reg, reg, imm);
4673 }
4674
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4675 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4676 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4677 CHECK(imm.is_int32()); // imulq only supports 32b immediate.
4678
4679 EmitRex64(dst, reg);
4680
4681 // See whether imm can be represented as a sign-extended 8bit value.
4682 int64_t v64 = imm.value();
4683 if (IsInt<8>(v64)) {
4684 // Sign-extension works.
4685 EmitUint8(0x6B);
4686 EmitOperand(dst.LowBits(), Operand(reg));
4687 EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4688 } else {
4689 // Not representable, use full immediate.
4690 EmitUint8(0x69);
4691 EmitOperand(dst.LowBits(), Operand(reg));
4692 EmitImmediate(imm);
4693 }
4694 }
4695
imulq(CpuRegister reg,const Address & address)4696 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4697 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4698 EmitRex64(reg, address);
4699 EmitUint8(0x0F);
4700 EmitUint8(0xAF);
4701 EmitOperand(reg.LowBits(), address);
4702 }
4703
4704
imull(CpuRegister reg)4705 void X86_64Assembler::imull(CpuRegister reg) {
4706 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4707 EmitOptionalRex32(reg);
4708 EmitUint8(0xF7);
4709 EmitOperand(5, Operand(reg));
4710 }
4711
4712
imulq(CpuRegister reg)4713 void X86_64Assembler::imulq(CpuRegister reg) {
4714 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4715 EmitRex64(reg);
4716 EmitUint8(0xF7);
4717 EmitOperand(5, Operand(reg));
4718 }
4719
4720
imull(const Address & address)4721 void X86_64Assembler::imull(const Address& address) {
4722 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4723 EmitOptionalRex32(address);
4724 EmitUint8(0xF7);
4725 EmitOperand(5, address);
4726 }
4727
4728
mull(CpuRegister reg)4729 void X86_64Assembler::mull(CpuRegister reg) {
4730 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4731 EmitOptionalRex32(reg);
4732 EmitUint8(0xF7);
4733 EmitOperand(4, Operand(reg));
4734 }
4735
4736
mull(const Address & address)4737 void X86_64Assembler::mull(const Address& address) {
4738 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4739 EmitOptionalRex32(address);
4740 EmitUint8(0xF7);
4741 EmitOperand(4, address);
4742 }
4743
4744
shll(CpuRegister reg,const Immediate & imm)4745 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4746 EmitGenericShift(false, 4, reg, imm);
4747 }
4748
4749
shlq(CpuRegister reg,const Immediate & imm)4750 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4751 EmitGenericShift(true, 4, reg, imm);
4752 }
4753
4754
shll(CpuRegister operand,CpuRegister shifter)4755 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4756 EmitGenericShift(false, 4, operand, shifter);
4757 }
4758
4759
shlq(CpuRegister operand,CpuRegister shifter)4760 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4761 EmitGenericShift(true, 4, operand, shifter);
4762 }
4763
4764
shrl(CpuRegister reg,const Immediate & imm)4765 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4766 EmitGenericShift(false, 5, reg, imm);
4767 }
4768
4769
shrq(CpuRegister reg,const Immediate & imm)4770 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4771 EmitGenericShift(true, 5, reg, imm);
4772 }
4773
4774
shrl(CpuRegister operand,CpuRegister shifter)4775 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4776 EmitGenericShift(false, 5, operand, shifter);
4777 }
4778
4779
shrq(CpuRegister operand,CpuRegister shifter)4780 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4781 EmitGenericShift(true, 5, operand, shifter);
4782 }
4783
4784
sarl(CpuRegister reg,const Immediate & imm)4785 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4786 EmitGenericShift(false, 7, reg, imm);
4787 }
4788
4789
sarl(CpuRegister operand,CpuRegister shifter)4790 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4791 EmitGenericShift(false, 7, operand, shifter);
4792 }
4793
4794
sarq(CpuRegister reg,const Immediate & imm)4795 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4796 EmitGenericShift(true, 7, reg, imm);
4797 }
4798
4799
sarq(CpuRegister operand,CpuRegister shifter)4800 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4801 EmitGenericShift(true, 7, operand, shifter);
4802 }
4803
4804
roll(CpuRegister reg,const Immediate & imm)4805 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4806 EmitGenericShift(false, 0, reg, imm);
4807 }
4808
4809
roll(CpuRegister operand,CpuRegister shifter)4810 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4811 EmitGenericShift(false, 0, operand, shifter);
4812 }
4813
4814
rorl(CpuRegister reg,const Immediate & imm)4815 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4816 EmitGenericShift(false, 1, reg, imm);
4817 }
4818
4819
rorl(CpuRegister operand,CpuRegister shifter)4820 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4821 EmitGenericShift(false, 1, operand, shifter);
4822 }
4823
4824
rolq(CpuRegister reg,const Immediate & imm)4825 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4826 EmitGenericShift(true, 0, reg, imm);
4827 }
4828
4829
rolq(CpuRegister operand,CpuRegister shifter)4830 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4831 EmitGenericShift(true, 0, operand, shifter);
4832 }
4833
4834
rorq(CpuRegister reg,const Immediate & imm)4835 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4836 EmitGenericShift(true, 1, reg, imm);
4837 }
4838
4839
rorq(CpuRegister operand,CpuRegister shifter)4840 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4841 EmitGenericShift(true, 1, operand, shifter);
4842 }
4843
4844
negl(CpuRegister reg)4845 void X86_64Assembler::negl(CpuRegister reg) {
4846 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4847 EmitOptionalRex32(reg);
4848 EmitUint8(0xF7);
4849 EmitOperand(3, Operand(reg));
4850 }
4851
4852
negq(CpuRegister reg)4853 void X86_64Assembler::negq(CpuRegister reg) {
4854 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855 EmitRex64(reg);
4856 EmitUint8(0xF7);
4857 EmitOperand(3, Operand(reg));
4858 }
4859
4860
notl(CpuRegister reg)4861 void X86_64Assembler::notl(CpuRegister reg) {
4862 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4863 EmitOptionalRex32(reg);
4864 EmitUint8(0xF7);
4865 EmitUint8(0xD0 | reg.LowBits());
4866 }
4867
4868
notq(CpuRegister reg)4869 void X86_64Assembler::notq(CpuRegister reg) {
4870 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4871 EmitRex64(reg);
4872 EmitUint8(0xF7);
4873 EmitOperand(2, Operand(reg));
4874 }
4875
4876
enter(const Immediate & imm)4877 void X86_64Assembler::enter(const Immediate& imm) {
4878 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4879 EmitUint8(0xC8);
4880 CHECK(imm.is_uint16()) << imm.value();
4881 EmitUint8(imm.value() & 0xFF);
4882 EmitUint8((imm.value() >> 8) & 0xFF);
4883 EmitUint8(0x00);
4884 }
4885
4886
leave()4887 void X86_64Assembler::leave() {
4888 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4889 EmitUint8(0xC9);
4890 }
4891
4892
ret()4893 void X86_64Assembler::ret() {
4894 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4895 EmitUint8(0xC3);
4896 }
4897
4898
ret(const Immediate & imm)4899 void X86_64Assembler::ret(const Immediate& imm) {
4900 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4901 EmitUint8(0xC2);
4902 CHECK(imm.is_uint16());
4903 EmitUint8(imm.value() & 0xFF);
4904 EmitUint8((imm.value() >> 8) & 0xFF);
4905 }
4906
4907
4908
nop()4909 void X86_64Assembler::nop() {
4910 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4911 EmitUint8(0x90);
4912 }
4913
4914
int3()4915 void X86_64Assembler::int3() {
4916 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4917 EmitUint8(0xCC);
4918 }
4919
4920
hlt()4921 void X86_64Assembler::hlt() {
4922 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4923 EmitUint8(0xF4);
4924 }
4925
4926
j(Condition condition,Label * label)4927 void X86_64Assembler::j(Condition condition, Label* label) {
4928 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4929 if (label->IsBound()) {
4930 static const int kShortSize = 2;
4931 static const int kLongSize = 6;
4932 int offset = label->Position() - buffer_.Size();
4933 CHECK_LE(offset, 0);
4934 if (IsInt<8>(offset - kShortSize)) {
4935 EmitUint8(0x70 + condition);
4936 EmitUint8((offset - kShortSize) & 0xFF);
4937 } else {
4938 EmitUint8(0x0F);
4939 EmitUint8(0x80 + condition);
4940 EmitInt32(offset - kLongSize);
4941 }
4942 } else {
4943 EmitUint8(0x0F);
4944 EmitUint8(0x80 + condition);
4945 EmitLabelLink(label);
4946 }
4947 }
4948
4949
j(Condition condition,NearLabel * label)4950 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4952 if (label->IsBound()) {
4953 static const int kShortSize = 2;
4954 int offset = label->Position() - buffer_.Size();
4955 CHECK_LE(offset, 0);
4956 CHECK(IsInt<8>(offset - kShortSize));
4957 EmitUint8(0x70 + condition);
4958 EmitUint8((offset - kShortSize) & 0xFF);
4959 } else {
4960 EmitUint8(0x70 + condition);
4961 EmitLabelLink(label);
4962 }
4963 }
4964
4965
jrcxz(NearLabel * label)4966 void X86_64Assembler::jrcxz(NearLabel* label) {
4967 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4968 if (label->IsBound()) {
4969 static const int kShortSize = 2;
4970 int offset = label->Position() - buffer_.Size();
4971 CHECK_LE(offset, 0);
4972 CHECK(IsInt<8>(offset - kShortSize));
4973 EmitUint8(0xE3);
4974 EmitUint8((offset - kShortSize) & 0xFF);
4975 } else {
4976 EmitUint8(0xE3);
4977 EmitLabelLink(label);
4978 }
4979 }
4980
4981
jmp(CpuRegister reg)4982 void X86_64Assembler::jmp(CpuRegister reg) {
4983 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4984 EmitOptionalRex32(reg);
4985 EmitUint8(0xFF);
4986 EmitRegisterOperand(4, reg.LowBits());
4987 }
4988
jmp(const Address & address)4989 void X86_64Assembler::jmp(const Address& address) {
4990 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4991 EmitOptionalRex32(address);
4992 EmitUint8(0xFF);
4993 EmitOperand(4, address);
4994 }
4995
jmp(Label * label)4996 void X86_64Assembler::jmp(Label* label) {
4997 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998 if (label->IsBound()) {
4999 static const int kShortSize = 2;
5000 static const int kLongSize = 5;
5001 int offset = label->Position() - buffer_.Size();
5002 CHECK_LE(offset, 0);
5003 if (IsInt<8>(offset - kShortSize)) {
5004 EmitUint8(0xEB);
5005 EmitUint8((offset - kShortSize) & 0xFF);
5006 } else {
5007 EmitUint8(0xE9);
5008 EmitInt32(offset - kLongSize);
5009 }
5010 } else {
5011 EmitUint8(0xE9);
5012 EmitLabelLink(label);
5013 }
5014 }
5015
5016
jmp(NearLabel * label)5017 void X86_64Assembler::jmp(NearLabel* label) {
5018 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5019 if (label->IsBound()) {
5020 static const int kShortSize = 2;
5021 int offset = label->Position() - buffer_.Size();
5022 CHECK_LE(offset, 0);
5023 CHECK(IsInt<8>(offset - kShortSize));
5024 EmitUint8(0xEB);
5025 EmitUint8((offset - kShortSize) & 0xFF);
5026 } else {
5027 EmitUint8(0xEB);
5028 EmitLabelLink(label);
5029 }
5030 }
5031
5032
rep_movsw()5033 void X86_64Assembler::rep_movsw() {
5034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5035 EmitUint8(0x66);
5036 EmitUint8(0xF3);
5037 EmitUint8(0xA5);
5038 }
5039
rep_movsb()5040 void X86_64Assembler::rep_movsb() {
5041 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5042 EmitUint8(0xF3);
5043 EmitUint8(0xA4);
5044 }
5045
rep_movsl()5046 void X86_64Assembler::rep_movsl() {
5047 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5048 EmitUint8(0xF3);
5049 EmitUint8(0xA5);
5050 }
5051
lock()5052 X86_64Assembler* X86_64Assembler::lock() {
5053 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5054 EmitUint8(0xF0);
5055 return this;
5056 }
5057
5058
cmpxchgb(const Address & address,CpuRegister reg)5059 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
5060 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5061 EmitOptionalByteRegNormalizingRex32(reg, address);
5062 EmitUint8(0x0F);
5063 EmitUint8(0xB0);
5064 EmitOperand(reg.LowBits(), address);
5065 }
5066
5067
cmpxchgw(const Address & address,CpuRegister reg)5068 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
5069 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5070 EmitOperandSizeOverride();
5071 EmitOptionalRex32(reg, address);
5072 EmitUint8(0x0F);
5073 EmitUint8(0xB1);
5074 EmitOperand(reg.LowBits(), address);
5075 }
5076
5077
cmpxchgl(const Address & address,CpuRegister reg)5078 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
5079 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5080 EmitOptionalRex32(reg, address);
5081 EmitUint8(0x0F);
5082 EmitUint8(0xB1);
5083 EmitOperand(reg.LowBits(), address);
5084 }
5085
5086
cmpxchgq(const Address & address,CpuRegister reg)5087 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
5088 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5089 EmitRex64(reg, address);
5090 EmitUint8(0x0F);
5091 EmitUint8(0xB1);
5092 EmitOperand(reg.LowBits(), address);
5093 }
5094
5095
mfence()5096 void X86_64Assembler::mfence() {
5097 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5098 EmitUint8(0x0F);
5099 EmitUint8(0xAE);
5100 EmitUint8(0xF0);
5101 }
5102
5103
gs()5104 X86_64Assembler* X86_64Assembler::gs() {
5105 // TODO: gs is a prefix and not an instruction
5106 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5107 EmitUint8(0x65);
5108 return this;
5109 }
5110
5111
AddImmediate(CpuRegister reg,const Immediate & imm)5112 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
5113 int value = imm.value();
5114 if (value != 0) {
5115 if (value > 0) {
5116 addl(reg, imm);
5117 } else {
5118 subl(reg, Immediate(value));
5119 }
5120 }
5121 }
5122
5123
setcc(Condition condition,CpuRegister dst)5124 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
5125 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5126 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
5127 if (dst.NeedsRex() || dst.AsRegister() > 3) {
5128 EmitOptionalRex(true, false, false, false, dst.NeedsRex());
5129 }
5130 EmitUint8(0x0F);
5131 EmitUint8(0x90 + condition);
5132 EmitUint8(0xC0 + dst.LowBits());
5133 }
5134
blsi(CpuRegister dst,CpuRegister src)5135 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
5136 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5137 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5138 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5139 /*X=*/ false,
5140 src.NeedsRex(),
5141 SET_VEX_M_0F_38);
5142 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
5143 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5144 SET_VEX_L_128,
5145 SET_VEX_PP_NONE);
5146 EmitUint8(byte_zero);
5147 EmitUint8(byte_one);
5148 EmitUint8(byte_two);
5149 EmitUint8(0xF3);
5150 EmitRegisterOperand(3, src.LowBits());
5151 }
5152
blsmsk(CpuRegister dst,CpuRegister src)5153 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
5154 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5155 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5156 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5157 /*X=*/ false,
5158 src.NeedsRex(),
5159 SET_VEX_M_0F_38);
5160 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5161 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5162 SET_VEX_L_128,
5163 SET_VEX_PP_NONE);
5164 EmitUint8(byte_zero);
5165 EmitUint8(byte_one);
5166 EmitUint8(byte_two);
5167 EmitUint8(0xF3);
5168 EmitRegisterOperand(2, src.LowBits());
5169 }
5170
blsr(CpuRegister dst,CpuRegister src)5171 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
5172 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5173 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
5174 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5175 /*X=*/ false,
5176 src.NeedsRex(),
5177 SET_VEX_M_0F_38);
5178 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5179 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5180 SET_VEX_L_128,
5181 SET_VEX_PP_NONE);
5182 EmitUint8(byte_zero);
5183 EmitUint8(byte_one);
5184 EmitUint8(byte_two);
5185 EmitUint8(0xF3);
5186 EmitRegisterOperand(1, src.LowBits());
5187 }
5188
bswapl(CpuRegister dst)5189 void X86_64Assembler::bswapl(CpuRegister dst) {
5190 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5191 EmitOptionalRex(false, false, false, false, dst.NeedsRex());
5192 EmitUint8(0x0F);
5193 EmitUint8(0xC8 + dst.LowBits());
5194 }
5195
bswapq(CpuRegister dst)5196 void X86_64Assembler::bswapq(CpuRegister dst) {
5197 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5198 EmitOptionalRex(false, true, false, false, dst.NeedsRex());
5199 EmitUint8(0x0F);
5200 EmitUint8(0xC8 + dst.LowBits());
5201 }
5202
bsfl(CpuRegister dst,CpuRegister src)5203 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
5204 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5205 EmitOptionalRex32(dst, src);
5206 EmitUint8(0x0F);
5207 EmitUint8(0xBC);
5208 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5209 }
5210
bsfl(CpuRegister dst,const Address & src)5211 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
5212 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5213 EmitOptionalRex32(dst, src);
5214 EmitUint8(0x0F);
5215 EmitUint8(0xBC);
5216 EmitOperand(dst.LowBits(), src);
5217 }
5218
bsfq(CpuRegister dst,CpuRegister src)5219 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
5220 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5221 EmitRex64(dst, src);
5222 EmitUint8(0x0F);
5223 EmitUint8(0xBC);
5224 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5225 }
5226
bsfq(CpuRegister dst,const Address & src)5227 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
5228 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5229 EmitRex64(dst, src);
5230 EmitUint8(0x0F);
5231 EmitUint8(0xBC);
5232 EmitOperand(dst.LowBits(), src);
5233 }
5234
bsrl(CpuRegister dst,CpuRegister src)5235 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
5236 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5237 EmitOptionalRex32(dst, src);
5238 EmitUint8(0x0F);
5239 EmitUint8(0xBD);
5240 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5241 }
5242
bsrl(CpuRegister dst,const Address & src)5243 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
5244 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5245 EmitOptionalRex32(dst, src);
5246 EmitUint8(0x0F);
5247 EmitUint8(0xBD);
5248 EmitOperand(dst.LowBits(), src);
5249 }
5250
bsrq(CpuRegister dst,CpuRegister src)5251 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5252 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5253 EmitRex64(dst, src);
5254 EmitUint8(0x0F);
5255 EmitUint8(0xBD);
5256 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5257 }
5258
bsrq(CpuRegister dst,const Address & src)5259 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5260 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5261 EmitRex64(dst, src);
5262 EmitUint8(0x0F);
5263 EmitUint8(0xBD);
5264 EmitOperand(dst.LowBits(), src);
5265 }
5266
popcntl(CpuRegister dst,CpuRegister src)5267 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5268 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5269 EmitUint8(0xF3);
5270 EmitOptionalRex32(dst, src);
5271 EmitUint8(0x0F);
5272 EmitUint8(0xB8);
5273 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5274 }
5275
popcntl(CpuRegister dst,const Address & src)5276 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5277 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5278 EmitUint8(0xF3);
5279 EmitOptionalRex32(dst, src);
5280 EmitUint8(0x0F);
5281 EmitUint8(0xB8);
5282 EmitOperand(dst.LowBits(), src);
5283 }
5284
popcntq(CpuRegister dst,CpuRegister src)5285 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5286 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5287 EmitUint8(0xF3);
5288 EmitRex64(dst, src);
5289 EmitUint8(0x0F);
5290 EmitUint8(0xB8);
5291 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5292 }
5293
popcntq(CpuRegister dst,const Address & src)5294 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5295 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5296 EmitUint8(0xF3);
5297 EmitRex64(dst, src);
5298 EmitUint8(0x0F);
5299 EmitUint8(0xB8);
5300 EmitOperand(dst.LowBits(), src);
5301 }
5302
rdtsc()5303 void X86_64Assembler::rdtsc() {
5304 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5305 EmitUint8(0x0F);
5306 EmitUint8(0x31);
5307 }
5308
repne_scasb()5309 void X86_64Assembler::repne_scasb() {
5310 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5311 EmitUint8(0xF2);
5312 EmitUint8(0xAE);
5313 }
5314
repne_scasw()5315 void X86_64Assembler::repne_scasw() {
5316 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5317 EmitUint8(0x66);
5318 EmitUint8(0xF2);
5319 EmitUint8(0xAF);
5320 }
5321
repe_cmpsw()5322 void X86_64Assembler::repe_cmpsw() {
5323 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5324 EmitUint8(0x66);
5325 EmitUint8(0xF3);
5326 EmitUint8(0xA7);
5327 }
5328
5329
repe_cmpsl()5330 void X86_64Assembler::repe_cmpsl() {
5331 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5332 EmitUint8(0xF3);
5333 EmitUint8(0xA7);
5334 }
5335
5336
repe_cmpsq()5337 void X86_64Assembler::repe_cmpsq() {
5338 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5339 EmitUint8(0xF3);
5340 EmitRex64();
5341 EmitUint8(0xA7);
5342 }
5343
ud2()5344 void X86_64Assembler::ud2() {
5345 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5346 EmitUint8(0x0F);
5347 EmitUint8(0x0B);
5348 }
5349
LoadDoubleConstant(XmmRegister dst,double value)5350 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5351 // TODO: Need to have a code constants table.
5352 int64_t constant = bit_cast<int64_t, double>(value);
5353 pushq(Immediate(High32Bits(constant)));
5354 pushq(Immediate(Low32Bits(constant)));
5355 movsd(dst, Address(CpuRegister(RSP), 0));
5356 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5357 }
5358
5359
Align(int alignment,int offset)5360 void X86_64Assembler::Align(int alignment, int offset) {
5361 CHECK(IsPowerOfTwo(alignment));
5362 // Emit nop instruction until the real position is aligned.
5363 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5364 nop();
5365 }
5366 }
5367
5368
Bind(Label * label)5369 void X86_64Assembler::Bind(Label* label) {
5370 int bound = buffer_.Size();
5371 CHECK(!label->IsBound()); // Labels can only be bound once.
5372 while (label->IsLinked()) {
5373 int position = label->LinkPosition();
5374 int next = buffer_.Load<int32_t>(position);
5375 buffer_.Store<int32_t>(position, bound - (position + 4));
5376 label->position_ = next;
5377 }
5378 label->BindTo(bound);
5379 }
5380
5381
Bind(NearLabel * label)5382 void X86_64Assembler::Bind(NearLabel* label) {
5383 int bound = buffer_.Size();
5384 CHECK(!label->IsBound()); // Labels can only be bound once.
5385 while (label->IsLinked()) {
5386 int position = label->LinkPosition();
5387 uint8_t delta = buffer_.Load<uint8_t>(position);
5388 int offset = bound - (position + 1);
5389 CHECK(IsInt<8>(offset));
5390 buffer_.Store<int8_t>(position, offset);
5391 label->position_ = delta != 0u ? label->position_ - delta : 0;
5392 }
5393 label->BindTo(bound);
5394 }
5395
5396
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5397 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5398 CHECK_GE(reg_or_opcode, 0);
5399 CHECK_LT(reg_or_opcode, 8);
5400 const int length = operand.length_;
5401 CHECK_GT(length, 0);
5402 // Emit the ModRM byte updated with the given reg value.
5403 CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5404 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5405 // Emit the rest of the encoded operand.
5406 for (int i = 1; i < length; i++) {
5407 EmitUint8(operand.encoding_[i]);
5408 }
5409 AssemblerFixup* fixup = operand.GetFixup();
5410 if (fixup != nullptr) {
5411 EmitFixup(fixup);
5412 }
5413 }
5414
5415
EmitImmediate(const Immediate & imm,bool is_16_op)5416 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5417 if (is_16_op) {
5418 EmitUint8(imm.value() & 0xFF);
5419 EmitUint8(imm.value() >> 8);
5420 } else if (imm.is_int32()) {
5421 EmitInt32(static_cast<int32_t>(imm.value()));
5422 } else {
5423 EmitInt64(imm.value());
5424 }
5425 }
5426
5427
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5428 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5429 const Operand& operand,
5430 const Immediate& immediate,
5431 bool is_16_op) {
5432 CHECK_GE(reg_or_opcode, 0);
5433 CHECK_LT(reg_or_opcode, 8);
5434 if (immediate.is_int8()) {
5435 // Use sign-extended 8-bit immediate.
5436 EmitUint8(0x83);
5437 EmitOperand(reg_or_opcode, operand);
5438 EmitUint8(immediate.value() & 0xFF);
5439 } else if (operand.IsRegister(CpuRegister(RAX))) {
5440 // Use short form if the destination is eax.
5441 EmitUint8(0x05 + (reg_or_opcode << 3));
5442 EmitImmediate(immediate, is_16_op);
5443 } else {
5444 EmitUint8(0x81);
5445 EmitOperand(reg_or_opcode, operand);
5446 EmitImmediate(immediate, is_16_op);
5447 }
5448 }
5449
5450
EmitLabel(Label * label,int instruction_size)5451 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5452 if (label->IsBound()) {
5453 int offset = label->Position() - buffer_.Size();
5454 CHECK_LE(offset, 0);
5455 EmitInt32(offset - instruction_size);
5456 } else {
5457 EmitLabelLink(label);
5458 }
5459 }
5460
5461
EmitLabelLink(Label * label)5462 void X86_64Assembler::EmitLabelLink(Label* label) {
5463 CHECK(!label->IsBound());
5464 int position = buffer_.Size();
5465 EmitInt32(label->position_);
5466 label->LinkTo(position);
5467 }
5468
5469
EmitLabelLink(NearLabel * label)5470 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5471 CHECK(!label->IsBound());
5472 int position = buffer_.Size();
5473 if (label->IsLinked()) {
5474 // Save the delta in the byte that we have to play with.
5475 uint32_t delta = position - label->LinkPosition();
5476 CHECK(IsUint<8>(delta));
5477 EmitUint8(delta & 0xFF);
5478 } else {
5479 EmitUint8(0);
5480 }
5481 label->LinkTo(position);
5482 }
5483
5484
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5485 void X86_64Assembler::EmitGenericShift(bool wide,
5486 int reg_or_opcode,
5487 CpuRegister reg,
5488 const Immediate& imm) {
5489 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5490 CHECK(imm.is_int8());
5491 if (wide) {
5492 EmitRex64(reg);
5493 } else {
5494 EmitOptionalRex32(reg);
5495 }
5496 if (imm.value() == 1) {
5497 EmitUint8(0xD1);
5498 EmitOperand(reg_or_opcode, Operand(reg));
5499 } else {
5500 EmitUint8(0xC1);
5501 EmitOperand(reg_or_opcode, Operand(reg));
5502 EmitUint8(imm.value() & 0xFF);
5503 }
5504 }
5505
5506
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5507 void X86_64Assembler::EmitGenericShift(bool wide,
5508 int reg_or_opcode,
5509 CpuRegister operand,
5510 CpuRegister shifter) {
5511 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5512 CHECK_EQ(shifter.AsRegister(), RCX);
5513 if (wide) {
5514 EmitRex64(operand);
5515 } else {
5516 EmitOptionalRex32(operand);
5517 }
5518 EmitUint8(0xD3);
5519 EmitOperand(reg_or_opcode, Operand(operand));
5520 }
5521
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5522 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5523 // REX.WRXB
5524 // W - 64-bit operand
5525 // R - MODRM.reg
5526 // X - SIB.index
5527 // B - MODRM.rm/SIB.base
5528 uint8_t rex = force ? 0x40 : 0;
5529 if (w) {
5530 rex |= 0x48; // REX.W000
5531 }
5532 if (r) {
5533 rex |= 0x44; // REX.0R00
5534 }
5535 if (x) {
5536 rex |= 0x42; // REX.00X0
5537 }
5538 if (b) {
5539 rex |= 0x41; // REX.000B
5540 }
5541 if (rex != 0) {
5542 EmitUint8(rex);
5543 }
5544 }
5545
EmitOptionalRex32(CpuRegister reg)5546 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5547 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5548 }
5549
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5550 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5551 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5552 }
5553
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5554 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5555 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5556 }
5557
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5558 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5559 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5560 }
5561
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5562 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5563 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5564 }
5565
EmitOptionalRex32(const Operand & operand)5566 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5567 uint8_t rex = operand.rex();
5568 if (rex != 0) {
5569 EmitUint8(rex);
5570 }
5571 }
5572
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5573 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5574 uint8_t rex = operand.rex();
5575 if (dst.NeedsRex()) {
5576 rex |= 0x44; // REX.0R00
5577 }
5578 if (rex != 0) {
5579 EmitUint8(rex);
5580 }
5581 }
5582
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5583 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5584 uint8_t rex = operand.rex();
5585 if (dst.NeedsRex()) {
5586 rex |= 0x44; // REX.0R00
5587 }
5588 if (rex != 0) {
5589 EmitUint8(rex);
5590 }
5591 }
5592
EmitRex64()5593 void X86_64Assembler::EmitRex64() {
5594 EmitOptionalRex(false, true, false, false, false);
5595 }
5596
EmitRex64(CpuRegister reg)5597 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5598 EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5599 }
5600
EmitRex64(const Operand & operand)5601 void X86_64Assembler::EmitRex64(const Operand& operand) {
5602 uint8_t rex = operand.rex();
5603 rex |= 0x48; // REX.W000
5604 EmitUint8(rex);
5605 }
5606
EmitRex64(CpuRegister dst,CpuRegister src)5607 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5608 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5609 }
5610
EmitRex64(XmmRegister dst,CpuRegister src)5611 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5612 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5613 }
5614
EmitRex64(CpuRegister dst,XmmRegister src)5615 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5616 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5617 }
5618
EmitRex64(CpuRegister dst,const Operand & operand)5619 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5620 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5621 if (dst.NeedsRex()) {
5622 rex |= 0x44; // REX.0R00
5623 }
5624 EmitUint8(rex);
5625 }
5626
EmitRex64(XmmRegister dst,const Operand & operand)5627 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5628 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5629 if (dst.NeedsRex()) {
5630 rex |= 0x44; // REX.0R00
5631 }
5632 EmitUint8(rex);
5633 }
5634
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)5635 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
5636 CpuRegister src,
5637 bool normalize_both) {
5638 // SPL, BPL, SIL, DIL need the REX prefix.
5639 bool force = src.AsRegister() > 3;
5640 if (normalize_both) {
5641 // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
5642 // prefix if either `src` or `dst` needs it.
5643 force |= dst.AsRegister() > 3;
5644 } else {
5645 // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
5646 // They need REX prefix only if `src` needs it, but not `dst`.
5647 }
5648 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5649 }
5650
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5651 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5652 uint8_t rex = operand.rex();
5653 // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5654 bool force = dst.AsRegister() > 3;
5655 if (force) {
5656 rex |= 0x40; // REX.0000
5657 }
5658 if (dst.NeedsRex()) {
5659 rex |= 0x44; // REX.0R00
5660 }
5661 if (rex != 0) {
5662 EmitUint8(rex);
5663 }
5664 }
5665
AddConstantArea()5666 void X86_64Assembler::AddConstantArea() {
5667 ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5668 for (size_t i = 0, e = area.size(); i < e; i++) {
5669 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5670 EmitInt32(area[i]);
5671 }
5672 }
5673
AppendInt32(int32_t v)5674 size_t ConstantArea::AppendInt32(int32_t v) {
5675 size_t result = buffer_.size() * elem_size_;
5676 buffer_.push_back(v);
5677 return result;
5678 }
5679
AddInt32(int32_t v)5680 size_t ConstantArea::AddInt32(int32_t v) {
5681 // Look for an existing match.
5682 for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5683 if (v == buffer_[i]) {
5684 return i * elem_size_;
5685 }
5686 }
5687
5688 // Didn't match anything.
5689 return AppendInt32(v);
5690 }
5691
AddInt64(int64_t v)5692 size_t ConstantArea::AddInt64(int64_t v) {
5693 int32_t v_low = v;
5694 int32_t v_high = v >> 32;
5695 if (buffer_.size() > 1) {
5696 // Ensure we don't pass the end of the buffer.
5697 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5698 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5699 return i * elem_size_;
5700 }
5701 }
5702 }
5703
5704 // Didn't match anything.
5705 size_t result = buffer_.size() * elem_size_;
5706 buffer_.push_back(v_low);
5707 buffer_.push_back(v_high);
5708 return result;
5709 }
5710
AddDouble(double v)5711 size_t ConstantArea::AddDouble(double v) {
5712 // Treat the value as a 64-bit integer value.
5713 return AddInt64(bit_cast<int64_t, double>(v));
5714 }
5715
AddFloat(float v)5716 size_t ConstantArea::AddFloat(float v) {
5717 // Treat the value as a 32-bit integer value.
5718 return AddInt32(bit_cast<int32_t, float>(v));
5719 }
5720
EmitVexPrefixByteZero(bool is_twobyte_form)5721 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5722 // Vex Byte 0,
5723 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5724 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5725 uint8_t vex_prefix = 0xC0;
5726 if (is_twobyte_form) {
5727 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex
5728 } else {
5729 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex
5730 }
5731 return vex_prefix;
5732 }
5733
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5734 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5735 // Vex Byte 1,
5736 uint8_t vex_prefix = VEX_INIT;
5737 /** Bit[7] This bit needs to be set to '1'
5738 otherwise the instruction is LES or LDS */
5739 if (!R) {
5740 // R .
5741 vex_prefix |= SET_VEX_R;
5742 }
5743 /** Bit[6] This bit needs to be set to '1'
5744 otherwise the instruction is LES or LDS */
5745 if (!X) {
5746 // X .
5747 vex_prefix |= SET_VEX_X;
5748 }
5749 /** Bit[5] This bit needs to be set to '1' */
5750 if (!B) {
5751 // B .
5752 vex_prefix |= SET_VEX_B;
5753 }
5754 /** Bits[4:0], Based on the instruction documentaion */
5755 vex_prefix |= SET_VEX_M;
5756 return vex_prefix;
5757 }
5758
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5759 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5760 X86_64ManagedRegister operand,
5761 int SET_VEX_L,
5762 int SET_VEX_PP) {
5763 // Vex Byte 1,
5764 uint8_t vex_prefix = VEX_INIT;
5765 /** Bit[7] This bit needs to be set to '1'
5766 otherwise the instruction is LES or LDS */
5767 if (!R) {
5768 // R .
5769 vex_prefix |= SET_VEX_R;
5770 }
5771 /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5772 if (operand.IsNoRegister()) {
5773 vex_prefix |= 0x78;
5774 } else if (operand.IsXmmRegister()) {
5775 XmmRegister vvvv = operand.AsXmmRegister();
5776 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5777 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5778 vex_prefix |= ((reg & 0x0F) << 3);
5779 } else if (operand.IsCpuRegister()) {
5780 CpuRegister vvvv = operand.AsCpuRegister();
5781 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5782 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5783 vex_prefix |= ((reg & 0x0F) << 3);
5784 }
5785 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5786 VEX.L = 0 indicates 128 bit vector operation */
5787 vex_prefix |= SET_VEX_L;
5788 // Bits[1:0] - "pp"
5789 vex_prefix |= SET_VEX_PP;
5790 return vex_prefix;
5791 }
5792
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5793 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5794 X86_64ManagedRegister operand,
5795 int SET_VEX_L,
5796 int SET_VEX_PP) {
5797 // Vex Byte 2,
5798 uint8_t vex_prefix = VEX_INIT;
5799
5800 /** Bit[7] This bits needs to be set to '1' with default value.
5801 When using C4H form of VEX prefix, REX.W value is ignored */
5802 if (W) {
5803 vex_prefix |= SET_VEX_W;
5804 }
5805 // Bits[6:3] - 'vvvv' the source or dest register specifier
5806 if (operand.IsXmmRegister()) {
5807 XmmRegister vvvv = operand.AsXmmRegister();
5808 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5809 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5810 vex_prefix |= ((reg & 0x0F) << 3);
5811 } else if (operand.IsCpuRegister()) {
5812 CpuRegister vvvv = operand.AsCpuRegister();
5813 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5814 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5815 vex_prefix |= ((reg & 0x0F) << 3);
5816 }
5817 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5818 VEX.L = 0 indicates 128 bit vector operation */
5819 vex_prefix |= SET_VEX_L;
5820 // Bits[1:0] - "pp"
5821 vex_prefix |= SET_VEX_PP;
5822 return vex_prefix;
5823 }
5824
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5825 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5826 int SET_VEX_L,
5827 int SET_VEX_PP) {
5828 // Vex Byte 2,
5829 uint8_t vex_prefix = VEX_INIT;
5830
5831 /** Bit[7] This bits needs to be set to '1' with default value.
5832 When using C4H form of VEX prefix, REX.W value is ignored */
5833 if (W) {
5834 vex_prefix |= SET_VEX_W;
5835 }
5836 /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5837 vex_prefix |= (0x0F << 3);
5838 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5839 VEX.L = 0 indicates 128 bit vector operation */
5840 vex_prefix |= SET_VEX_L;
5841
5842 // Bits[1:0] - "pp"
5843 if (SET_VEX_PP != SET_VEX_PP_NONE) {
5844 vex_prefix |= SET_VEX_PP;
5845 }
5846 return vex_prefix;
5847 }
5848
5849 } // namespace x86_64
5850 } // namespace art
5851