• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art HIDDEN {
25 namespace x86_64 {
26 
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   uint8_t ByteZero, ByteOne, ByteTwo;
495   bool is_twobyte_form = false;
496   // Instruction VEX Prefix
497   uint8_t rex = src.rex();
498   bool Rex_x = rex & GET_REX_X;
499   bool Rex_b = rex & GET_REX_B;
500   if (!Rex_b && !Rex_x) {
501     is_twobyte_form = true;
502   }
503   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504   if (is_twobyte_form) {
505     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507                                    vvvv_reg,
508                                    SET_VEX_L_128,
509                                    SET_VEX_PP_NONE);
510   } else {
511     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512                                    Rex_x,
513                                    Rex_b,
514                                    SET_VEX_M_0F);
515     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516                                    SET_VEX_L_128,
517                                    SET_VEX_PP_NONE);
518   }
519   EmitUint8(ByteZero);
520   EmitUint8(ByteOne);
521   if (!is_twobyte_form) {
522     EmitUint8(ByteTwo);
523   }
524   // Instruction Opcode
525   EmitUint8(0x28);
526   // Instruction Operands
527   EmitOperand(dst.LowBits(), src);
528 }
529 
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531   if (CpuHasAVXorAVX2FeatureFlag()) {
532     vmovups(dst, src);
533     return;
534   }
535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536   EmitOptionalRex32(dst, src);
537   EmitUint8(0x0F);
538   EmitUint8(0x10);
539   EmitOperand(dst.LowBits(), src);
540 }
541 
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544   DCHECK(CpuHasAVXorAVX2FeatureFlag());
545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546   uint8_t ByteZero, ByteOne, ByteTwo;
547   bool is_twobyte_form = false;
548   // Instruction VEX Prefix
549   uint8_t rex = src.rex();
550   bool Rex_x = rex & GET_REX_X;
551   bool Rex_b = rex & GET_REX_B;
552   if (!Rex_x && !Rex_b) {
553     is_twobyte_form = true;
554   }
555   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556   if (is_twobyte_form) {
557     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559                                    vvvv_reg,
560                                    SET_VEX_L_128,
561                                    SET_VEX_PP_NONE);
562   } else {
563     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564                                    Rex_x,
565                                    Rex_b,
566                                    SET_VEX_M_0F);
567     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568                                    SET_VEX_L_128,
569                                    SET_VEX_PP_NONE);
570   }
571   EmitUint8(ByteZero);
572   EmitUint8(ByteOne);
573   if (!is_twobyte_form) {
574     EmitUint8(ByteTwo);
575   }
576   // Instruction Opcode
577   EmitUint8(0x10);
578   // Instruction Operands
579   EmitOperand(dst.LowBits(), src);
580 }
581 
582 
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584   if (CpuHasAVXorAVX2FeatureFlag()) {
585     vmovaps(dst, src);
586     return;
587   }
588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x29);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597   DCHECK(CpuHasAVXorAVX2FeatureFlag());
598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599   uint8_t ByteZero, ByteOne, ByteTwo;
600   bool is_twobyte_form = false;
601 
602   // Instruction VEX Prefix
603   uint8_t rex = dst.rex();
604   bool Rex_x = rex & GET_REX_X;
605   bool Rex_b = rex & GET_REX_B;
606   if (!Rex_b && !Rex_x) {
607     is_twobyte_form = true;
608   }
609   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610   if (is_twobyte_form) {
611     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613                                    vvvv_reg,
614                                    SET_VEX_L_128,
615                                    SET_VEX_PP_NONE);
616   } else {
617     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618                                    Rex_x,
619                                    Rex_b,
620                                    SET_VEX_M_0F);
621     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622                                    SET_VEX_L_128,
623                                    SET_VEX_PP_NONE);
624   }
625   EmitUint8(ByteZero);
626   EmitUint8(ByteOne);
627   if (!is_twobyte_form) {
628     EmitUint8(ByteTwo);
629   }
630   // Instruction Opcode
631   EmitUint8(0x29);
632   // Instruction Operands
633   EmitOperand(src.LowBits(), dst);
634 }
635 
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637   if (CpuHasAVXorAVX2FeatureFlag()) {
638     vmovups(dst, src);
639     return;
640   }
641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642   EmitOptionalRex32(src, dst);
643   EmitUint8(0x0F);
644   EmitUint8(0x11);
645   EmitOperand(src.LowBits(), dst);
646 }
647 
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650   DCHECK(CpuHasAVXorAVX2FeatureFlag());
651   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652   uint8_t ByteZero, ByteOne, ByteTwo;
653   bool is_twobyte_form = false;
654 
655   // Instruction VEX Prefix
656   uint8_t rex = dst.rex();
657   bool Rex_x = rex & GET_REX_X;
658   bool Rex_b = rex & GET_REX_B;
659   if (!Rex_b && !Rex_x) {
660     is_twobyte_form = true;
661   }
662   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663   if (is_twobyte_form) {
664     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666                                    vvvv_reg,
667                                    SET_VEX_L_128,
668                                    SET_VEX_PP_NONE);
669   } else {
670     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671                                    Rex_x,
672                                    Rex_b,
673                                    SET_VEX_M_0F);
674     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675                                    SET_VEX_L_128,
676                                    SET_VEX_PP_NONE);
677   }
678   EmitUint8(ByteZero);
679   EmitUint8(ByteOne);
680   if (!is_twobyte_form) {
681     EmitUint8(ByteTwo);
682   }
683   // Instruction Opcode
684   EmitUint8(0x11);
685   // Instruction Operands
686   EmitOperand(src.LowBits(), dst);
687 }
688 
689 
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692   EmitUint8(0xF3);
693   EmitOptionalRex32(dst, src);
694   EmitUint8(0x0F);
695   EmitUint8(0x10);
696   EmitOperand(dst.LowBits(), src);
697 }
698 
699 
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702   EmitUint8(0xF3);
703   EmitOptionalRex32(src, dst);
704   EmitUint8(0x0F);
705   EmitUint8(0x11);
706   EmitOperand(src.LowBits(), dst);
707 }
708 
709 
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712   EmitUint8(0xF3);
713   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
714   EmitUint8(0x0F);
715   EmitUint8(0x11);
716   EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718 
719 
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722   EmitRex64(dst, src);
723   EmitUint8(0x63);
724   EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726 
727 
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730   EmitRex64(dst, src);
731   EmitUint8(0x63);
732   EmitOperand(dst.LowBits(), src);
733 }
734 
735 
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737   movd(dst, src, true);
738 }
739 
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741   movd(dst, src, true);
742 }
743 
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746   EmitUint8(0x66);
747   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748   EmitUint8(0x0F);
749   EmitUint8(0x6E);
750   EmitOperand(dst.LowBits(), Operand(src));
751 }
752 
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755   EmitUint8(0x66);
756   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757   EmitUint8(0x0F);
758   EmitUint8(0x7E);
759   EmitOperand(src.LowBits(), Operand(dst));
760 }
761 
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764   EmitUint8(0xF3);
765   EmitOptionalRex32(dst, src);
766   EmitUint8(0x0F);
767   EmitUint8(0x58);
768   EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770 
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773   EmitUint8(0xF3);
774   EmitOptionalRex32(dst, src);
775   EmitUint8(0x0F);
776   EmitUint8(0x58);
777   EmitOperand(dst.LowBits(), src);
778 }
779 
780 
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783   EmitUint8(0xF3);
784   EmitOptionalRex32(dst, src);
785   EmitUint8(0x0F);
786   EmitUint8(0x5C);
787   EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789 
790 
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793   EmitUint8(0xF3);
794   EmitOptionalRex32(dst, src);
795   EmitUint8(0x0F);
796   EmitUint8(0x5C);
797   EmitOperand(dst.LowBits(), src);
798 }
799 
800 
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803   EmitUint8(0xF3);
804   EmitOptionalRex32(dst, src);
805   EmitUint8(0x0F);
806   EmitUint8(0x59);
807   EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809 
810 
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813   EmitUint8(0xF3);
814   EmitOptionalRex32(dst, src);
815   EmitUint8(0x0F);
816   EmitUint8(0x59);
817   EmitOperand(dst.LowBits(), src);
818 }
819 
820 
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823   EmitUint8(0xF3);
824   EmitOptionalRex32(dst, src);
825   EmitUint8(0x0F);
826   EmitUint8(0x5E);
827   EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829 
830 
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833   EmitUint8(0xF3);
834   EmitOptionalRex32(dst, src);
835   EmitUint8(0x0F);
836   EmitUint8(0x5E);
837   EmitOperand(dst.LowBits(), src);
838 }
839 
840 
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843   EmitOptionalRex32(dst, src);
844   EmitUint8(0x0F);
845   EmitUint8(0x58);
846   EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848 
849 
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852   EmitOptionalRex32(dst, src);
853   EmitUint8(0x0F);
854   EmitUint8(0x5C);
855   EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857 
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859   DCHECK(CpuHasAVXorAVX2FeatureFlag());
860   bool is_twobyte_form = false;
861   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
862   if (!add_right.NeedsRex()) {
863     is_twobyte_form = true;
864   } else if (!add_left.NeedsRex()) {
865     return vaddps(dst, add_right, add_left);
866   }
867   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
868   X86_64ManagedRegister vvvv_reg =
869       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
870   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
871   if (is_twobyte_form) {
872     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
873   } else {
874     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
875                                    /*X=*/ false,
876                                    add_right.NeedsRex(),
877                                    SET_VEX_M_0F);
878     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
879   }
880   EmitUint8(ByteZero);
881   EmitUint8(ByteOne);
882   if (!is_twobyte_form) {
883     EmitUint8(ByteTwo);
884   }
885   EmitUint8(0x58);
886   EmitXmmRegisterOperand(dst.LowBits(), add_right);
887 }
888 
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)889 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
890   DCHECK(CpuHasAVXorAVX2FeatureFlag());
891   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
892   bool is_twobyte_form = false;
893   uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
894   if (!src2.NeedsRex()) {
895     is_twobyte_form = true;
896   }
897   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
898   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
899   if (is_twobyte_form) {
900     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
901   } else {
902     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
903     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
904   }
905   EmitUint8(byte_zero);
906   EmitUint8(byte_one);
907   if (!is_twobyte_form) {
908     EmitUint8(byte_two);
909   }
910   EmitUint8(0x5C);
911   EmitXmmRegisterOperand(dst.LowBits(), src2);
912 }
913 
914 
mulps(XmmRegister dst,XmmRegister src)915 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
917   EmitOptionalRex32(dst, src);
918   EmitUint8(0x0F);
919   EmitUint8(0x59);
920   EmitXmmRegisterOperand(dst.LowBits(), src);
921 }
922 
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)923 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
924   DCHECK(CpuHasAVXorAVX2FeatureFlag());
925   bool is_twobyte_form = false;
926   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
927   if (!src2.NeedsRex()) {
928     is_twobyte_form = true;
929   } else if (!src1.NeedsRex()) {
930     return vmulps(dst, src2, src1);
931   }
932   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
933   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
934   X86_64ManagedRegister vvvv_reg =
935       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
936   if (is_twobyte_form) {
937     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
938   } else {
939     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
940                                    /*X=*/ false,
941                                    src2.NeedsRex(),
942                                    SET_VEX_M_0F);
943     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
944   }
945   EmitUint8(ByteZero);
946   EmitUint8(ByteOne);
947   if (!is_twobyte_form) {
948     EmitUint8(ByteTwo);
949   }
950   EmitUint8(0x59);
951   EmitXmmRegisterOperand(dst.LowBits(), src2);
952 }
953 
divps(XmmRegister dst,XmmRegister src)954 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
956   EmitOptionalRex32(dst, src);
957   EmitUint8(0x0F);
958   EmitUint8(0x5E);
959   EmitXmmRegisterOperand(dst.LowBits(), src);
960 }
961 
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)962 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
963   DCHECK(CpuHasAVXorAVX2FeatureFlag());
964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
965   bool is_twobyte_form = false;
966   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
967   if (!src2.NeedsRex()) {
968     is_twobyte_form = true;
969   }
970   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
971   X86_64ManagedRegister vvvv_reg =
972       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
973   if (is_twobyte_form) {
974     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
975   } else {
976     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
977                                    /*X=*/ false,
978                                    src2.NeedsRex(),
979                                    SET_VEX_M_0F);
980     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
981   }
982   EmitUint8(ByteZero);
983   EmitUint8(ByteOne);
984   if (!is_twobyte_form) {
985     EmitUint8(ByteTwo);
986   }
987   EmitUint8(0x5E);
988   EmitXmmRegisterOperand(dst.LowBits(), src2);
989 }
990 
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)991 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
992   DCHECK(CpuHasAVXorAVX2FeatureFlag());
993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
994   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
995   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
996   X86_64ManagedRegister vvvv_reg =
997       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
998   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
999                                  /*X=*/ false,
1000                                  right.NeedsRex(),
1001                                  SET_VEX_M_0F_38);
1002   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1003   EmitUint8(ByteZero);
1004   EmitUint8(ByteOne);
1005   EmitUint8(ByteTwo);
1006   EmitUint8(0xA9);
1007   EmitXmmRegisterOperand(acc.LowBits(), right);
1008 }
1009 
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)1010 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
1011   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1012   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1013   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1014   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1015   X86_64ManagedRegister vvvv_reg =
1016       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
1017   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
1018                                  /*X=*/ false,
1019                                  right.NeedsRex(),
1020                                  SET_VEX_M_0F_38);
1021   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1022   EmitUint8(ByteZero);
1023   EmitUint8(ByteOne);
1024   EmitUint8(ByteTwo);
1025   EmitUint8(0xA9);
1026   EmitXmmRegisterOperand(acc.LowBits(), right);
1027 }
flds(const Address & src)1028 void X86_64Assembler::flds(const Address& src) {
1029   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1030   EmitUint8(0xD9);
1031   EmitOperand(0, src);
1032 }
1033 
1034 
fsts(const Address & dst)1035 void X86_64Assembler::fsts(const Address& dst) {
1036   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1037   EmitUint8(0xD9);
1038   EmitOperand(2, dst);
1039 }
1040 
1041 
fstps(const Address & dst)1042 void X86_64Assembler::fstps(const Address& dst) {
1043   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1044   EmitUint8(0xD9);
1045   EmitOperand(3, dst);
1046 }
1047 
1048 
movapd(XmmRegister dst,XmmRegister src)1049 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1050   if (CpuHasAVXorAVX2FeatureFlag()) {
1051     vmovapd(dst, src);
1052     return;
1053   }
1054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1055   EmitUint8(0x66);
1056   EmitOptionalRex32(dst, src);
1057   EmitUint8(0x0F);
1058   EmitUint8(0x28);
1059   EmitXmmRegisterOperand(dst.LowBits(), src);
1060 }
1061 
1062 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1063 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1064   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1065   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1066   uint8_t ByteZero, ByteOne, ByteTwo;
1067   bool is_twobyte_form = true;
1068 
1069   if (src.NeedsRex() && dst.NeedsRex()) {
1070     is_twobyte_form = false;
1071   }
1072   // Instruction VEX Prefix
1073   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1074   bool load = dst.NeedsRex();
1075   if (is_twobyte_form) {
1076     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1077     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1078     ByteOne = EmitVexPrefixByteOne(rex_bit,
1079                                    vvvv_reg,
1080                                    SET_VEX_L_128,
1081                                    SET_VEX_PP_66);
1082   } else {
1083     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1084                                    /*X=*/ false,
1085                                    src.NeedsRex(),
1086                                    SET_VEX_M_0F);
1087     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1088                                    SET_VEX_L_128,
1089                                    SET_VEX_PP_66);
1090   }
1091   EmitUint8(ByteZero);
1092   EmitUint8(ByteOne);
1093   if (!is_twobyte_form) {
1094     EmitUint8(ByteTwo);
1095   }
1096   // Instruction Opcode
1097   if (is_twobyte_form && !load) {
1098     EmitUint8(0x29);
1099   } else {
1100     EmitUint8(0x28);
1101   }
1102   // Instruction Operands
1103   if (is_twobyte_form && !load) {
1104     EmitXmmRegisterOperand(src.LowBits(), dst);
1105   } else {
1106     EmitXmmRegisterOperand(dst.LowBits(), src);
1107   }
1108 }
1109 
movapd(XmmRegister dst,const Address & src)1110 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1111   if (CpuHasAVXorAVX2FeatureFlag()) {
1112     vmovapd(dst, src);
1113     return;
1114   }
1115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1116   EmitUint8(0x66);
1117   EmitOptionalRex32(dst, src);
1118   EmitUint8(0x0F);
1119   EmitUint8(0x28);
1120   EmitOperand(dst.LowBits(), src);
1121 }
1122 
1123 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1124 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1125   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1127   uint8_t ByteZero, ByteOne, ByteTwo;
1128   bool is_twobyte_form = false;
1129 
1130   // Instruction VEX Prefix
1131   uint8_t rex = src.rex();
1132   bool Rex_x = rex & GET_REX_X;
1133   bool Rex_b = rex & GET_REX_B;
1134   if (!Rex_b && !Rex_x) {
1135     is_twobyte_form = true;
1136   }
1137   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1138   if (is_twobyte_form) {
1139     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1140     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1141                                    vvvv_reg,
1142                                    SET_VEX_L_128,
1143                                    SET_VEX_PP_66);
1144   } else {
1145     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1146                                    Rex_x,
1147                                    Rex_b,
1148                                    SET_VEX_M_0F);
1149     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1150                                    SET_VEX_L_128,
1151                                    SET_VEX_PP_66);
1152   }
1153   EmitUint8(ByteZero);
1154   EmitUint8(ByteOne);
1155   if (!is_twobyte_form) {
1156     EmitUint8(ByteTwo);
1157   }
1158   // Instruction Opcode
1159   EmitUint8(0x28);
1160   // Instruction Operands
1161   EmitOperand(dst.LowBits(), src);
1162 }
1163 
movupd(XmmRegister dst,const Address & src)1164 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1165   if (CpuHasAVXorAVX2FeatureFlag()) {
1166     vmovupd(dst, src);
1167     return;
1168   }
1169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1170   EmitUint8(0x66);
1171   EmitOptionalRex32(dst, src);
1172   EmitUint8(0x0F);
1173   EmitUint8(0x10);
1174   EmitOperand(dst.LowBits(), src);
1175 }
1176 
1177 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1178 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1179   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1181   bool is_twobyte_form = false;
1182   uint8_t ByteZero, ByteOne, ByteTwo;
1183 
1184   // Instruction VEX Prefix
1185   uint8_t rex = src.rex();
1186   bool Rex_x = rex & GET_REX_X;
1187   bool Rex_b = rex & GET_REX_B;
1188   if (!Rex_b && !Rex_x) {
1189     is_twobyte_form = true;
1190   }
1191   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1192   if (is_twobyte_form) {
1193     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1194     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1195                                    vvvv_reg,
1196                                    SET_VEX_L_128,
1197                                    SET_VEX_PP_66);
1198   } else {
1199     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1200                                    Rex_x,
1201                                    Rex_b,
1202                                    SET_VEX_M_0F);
1203     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1204                                    SET_VEX_L_128,
1205                                    SET_VEX_PP_66);
1206   }
1207   EmitUint8(ByteZero);
1208   EmitUint8(ByteOne);
1209   if (!is_twobyte_form)
1210   EmitUint8(ByteTwo);
1211   // Instruction Opcode
1212   EmitUint8(0x10);
1213   // Instruction Operands
1214   EmitOperand(dst.LowBits(), src);
1215 }
1216 
movapd(const Address & dst,XmmRegister src)1217 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1218   if (CpuHasAVXorAVX2FeatureFlag()) {
1219     vmovapd(dst, src);
1220     return;
1221   }
1222   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1223   EmitUint8(0x66);
1224   EmitOptionalRex32(src, dst);
1225   EmitUint8(0x0F);
1226   EmitUint8(0x29);
1227   EmitOperand(src.LowBits(), dst);
1228 }
1229 
1230 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1231 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1232   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1233   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1234   bool is_twobyte_form = false;
1235   uint8_t ByteZero, ByteOne, ByteTwo;
1236   // Instruction VEX Prefix
1237   uint8_t rex = dst.rex();
1238   bool Rex_x = rex & GET_REX_X;
1239   bool Rex_b = rex & GET_REX_B;
1240   if (!Rex_x && !Rex_b) {
1241     is_twobyte_form = true;
1242   }
1243   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1244   if (is_twobyte_form) {
1245     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1246     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1247                                    vvvv_reg,
1248                                    SET_VEX_L_128,
1249                                    SET_VEX_PP_66);
1250   } else {
1251     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1252                                    Rex_x,
1253                                    Rex_b,
1254                                    SET_VEX_M_0F);
1255     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1256                                    SET_VEX_L_128,
1257                                    SET_VEX_PP_66);
1258   }
1259   EmitUint8(ByteZero);
1260   EmitUint8(ByteOne);
1261   if (!is_twobyte_form) {
1262     EmitUint8(ByteTwo);
1263   }
1264   // Instruction Opcode
1265   EmitUint8(0x29);
1266   // Instruction Operands
1267   EmitOperand(src.LowBits(), dst);
1268 }
1269 
movupd(const Address & dst,XmmRegister src)1270 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1271   if (CpuHasAVXorAVX2FeatureFlag()) {
1272     vmovupd(dst, src);
1273     return;
1274   }
1275   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1276   EmitUint8(0x66);
1277   EmitOptionalRex32(src, dst);
1278   EmitUint8(0x0F);
1279   EmitUint8(0x11);
1280   EmitOperand(src.LowBits(), dst);
1281 }
1282 
1283 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1284 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1285   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1286   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1287   bool is_twobyte_form = false;
1288   uint8_t ByteZero, ByteOne, ByteTwo;
1289 
1290   // Instruction VEX Prefix
1291   uint8_t rex = dst.rex();
1292   bool Rex_x = rex & GET_REX_X;
1293   bool Rex_b = rex & GET_REX_B;
1294   if (!Rex_x && !Rex_b) {
1295     is_twobyte_form = true;
1296   }
1297   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1298   if (is_twobyte_form) {
1299     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1300     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1301                                    vvvv_reg,
1302                                    SET_VEX_L_128,
1303                                    SET_VEX_PP_66);
1304   } else {
1305     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1306                                    Rex_x,
1307                                    Rex_b,
1308                                    SET_VEX_M_0F);
1309     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1310                                    SET_VEX_L_128,
1311                                    SET_VEX_PP_66);
1312   }
1313   EmitUint8(ByteZero);
1314   EmitUint8(ByteOne);
1315   if (!is_twobyte_form) {
1316     EmitUint8(ByteTwo);
1317   }
1318   // Instruction Opcode
1319   EmitUint8(0x11);
1320   // Instruction Operands
1321   EmitOperand(src.LowBits(), dst);
1322 }
1323 
1324 
movsd(XmmRegister dst,const Address & src)1325 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1326   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1327   EmitUint8(0xF2);
1328   EmitOptionalRex32(dst, src);
1329   EmitUint8(0x0F);
1330   EmitUint8(0x10);
1331   EmitOperand(dst.LowBits(), src);
1332 }
1333 
1334 
movsd(const Address & dst,XmmRegister src)1335 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1336   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1337   EmitUint8(0xF2);
1338   EmitOptionalRex32(src, dst);
1339   EmitUint8(0x0F);
1340   EmitUint8(0x11);
1341   EmitOperand(src.LowBits(), dst);
1342 }
1343 
1344 
movsd(XmmRegister dst,XmmRegister src)1345 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1346   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1347   EmitUint8(0xF2);
1348   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1349   EmitUint8(0x0F);
1350   EmitUint8(0x11);
1351   EmitXmmRegisterOperand(src.LowBits(), dst);
1352 }
1353 
1354 
addsd(XmmRegister dst,XmmRegister src)1355 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1357   EmitUint8(0xF2);
1358   EmitOptionalRex32(dst, src);
1359   EmitUint8(0x0F);
1360   EmitUint8(0x58);
1361   EmitXmmRegisterOperand(dst.LowBits(), src);
1362 }
1363 
1364 
addsd(XmmRegister dst,const Address & src)1365 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1367   EmitUint8(0xF2);
1368   EmitOptionalRex32(dst, src);
1369   EmitUint8(0x0F);
1370   EmitUint8(0x58);
1371   EmitOperand(dst.LowBits(), src);
1372 }
1373 
1374 
subsd(XmmRegister dst,XmmRegister src)1375 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1376   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1377   EmitUint8(0xF2);
1378   EmitOptionalRex32(dst, src);
1379   EmitUint8(0x0F);
1380   EmitUint8(0x5C);
1381   EmitXmmRegisterOperand(dst.LowBits(), src);
1382 }
1383 
1384 
subsd(XmmRegister dst,const Address & src)1385 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1386   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1387   EmitUint8(0xF2);
1388   EmitOptionalRex32(dst, src);
1389   EmitUint8(0x0F);
1390   EmitUint8(0x5C);
1391   EmitOperand(dst.LowBits(), src);
1392 }
1393 
1394 
mulsd(XmmRegister dst,XmmRegister src)1395 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1396   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1397   EmitUint8(0xF2);
1398   EmitOptionalRex32(dst, src);
1399   EmitUint8(0x0F);
1400   EmitUint8(0x59);
1401   EmitXmmRegisterOperand(dst.LowBits(), src);
1402 }
1403 
1404 
mulsd(XmmRegister dst,const Address & src)1405 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1406   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1407   EmitUint8(0xF2);
1408   EmitOptionalRex32(dst, src);
1409   EmitUint8(0x0F);
1410   EmitUint8(0x59);
1411   EmitOperand(dst.LowBits(), src);
1412 }
1413 
1414 
divsd(XmmRegister dst,XmmRegister src)1415 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1416   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1417   EmitUint8(0xF2);
1418   EmitOptionalRex32(dst, src);
1419   EmitUint8(0x0F);
1420   EmitUint8(0x5E);
1421   EmitXmmRegisterOperand(dst.LowBits(), src);
1422 }
1423 
1424 
divsd(XmmRegister dst,const Address & src)1425 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1426   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1427   EmitUint8(0xF2);
1428   EmitOptionalRex32(dst, src);
1429   EmitUint8(0x0F);
1430   EmitUint8(0x5E);
1431   EmitOperand(dst.LowBits(), src);
1432 }
1433 
1434 
addpd(XmmRegister dst,XmmRegister src)1435 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1436   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1437   EmitUint8(0x66);
1438   EmitOptionalRex32(dst, src);
1439   EmitUint8(0x0F);
1440   EmitUint8(0x58);
1441   EmitXmmRegisterOperand(dst.LowBits(), src);
1442 }
1443 
1444 
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1445 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1446   bool is_twobyte_form = false;
1447   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1448   if (!add_right.NeedsRex()) {
1449     is_twobyte_form = true;
1450   } else if (!add_left.NeedsRex()) {
1451     return vaddpd(dst, add_right, add_left);
1452   }
1453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1454   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1455   X86_64ManagedRegister vvvv_reg =
1456       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1457   if (is_twobyte_form) {
1458     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1459   } else {
1460     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1461                                    /*X=*/ false,
1462                                    add_right.NeedsRex(),
1463                                    SET_VEX_M_0F);
1464     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1465   }
1466   EmitUint8(ByteZero);
1467   EmitUint8(ByteOne);
1468   if (!is_twobyte_form) {
1469     EmitUint8(ByteTwo);
1470   }
1471   EmitUint8(0x58);
1472   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1473 }
1474 
1475 
subpd(XmmRegister dst,XmmRegister src)1476 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1478   EmitUint8(0x66);
1479   EmitOptionalRex32(dst, src);
1480   EmitUint8(0x0F);
1481   EmitUint8(0x5C);
1482   EmitXmmRegisterOperand(dst.LowBits(), src);
1483 }
1484 
1485 
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1486 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1488   bool is_twobyte_form = false;
1489   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1490   if (!src2.NeedsRex()) {
1491     is_twobyte_form = true;
1492   }
1493   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1494   X86_64ManagedRegister vvvv_reg =
1495       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1496   if (is_twobyte_form) {
1497     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1498   } else {
1499     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1500                                    /*X=*/ false,
1501                                    src2.NeedsRex(),
1502                                    SET_VEX_M_0F);
1503     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1504   }
1505   EmitUint8(ByteZero);
1506   EmitUint8(ByteOne);
1507   if (!is_twobyte_form) {
1508     EmitUint8(ByteTwo);
1509   }
1510   EmitUint8(0x5C);
1511   EmitXmmRegisterOperand(dst.LowBits(), src2);
1512 }
1513 
1514 
mulpd(XmmRegister dst,XmmRegister src)1515 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1516   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1517   EmitUint8(0x66);
1518   EmitOptionalRex32(dst, src);
1519   EmitUint8(0x0F);
1520   EmitUint8(0x59);
1521   EmitXmmRegisterOperand(dst.LowBits(), src);
1522 }
1523 
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1524 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1525   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1526   bool is_twobyte_form = false;
1527   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1528   if (!src2.NeedsRex()) {
1529     is_twobyte_form = true;
1530   } else if (!src1.NeedsRex()) {
1531     return vmulpd(dst, src2, src1);
1532   }
1533   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1534   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1535   X86_64ManagedRegister vvvv_reg =
1536       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1537   if (is_twobyte_form) {
1538     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539   } else {
1540     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1541                                    /*X=*/ false,
1542                                    src2.NeedsRex(),
1543                                    SET_VEX_M_0F);
1544     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1545   }
1546   EmitUint8(ByteZero);
1547   EmitUint8(ByteOne);
1548   if (!is_twobyte_form) {
1549     EmitUint8(ByteTwo);
1550   }
1551   EmitUint8(0x59);
1552   EmitXmmRegisterOperand(dst.LowBits(), src2);
1553 }
1554 
divpd(XmmRegister dst,XmmRegister src)1555 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1556   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1557   EmitUint8(0x66);
1558   EmitOptionalRex32(dst, src);
1559   EmitUint8(0x0F);
1560   EmitUint8(0x5E);
1561   EmitXmmRegisterOperand(dst.LowBits(), src);
1562 }
1563 
1564 
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1565 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1566   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1567   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1568   bool is_twobyte_form = false;
1569   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1570   if (!src2.NeedsRex()) {
1571     is_twobyte_form = true;
1572   }
1573   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1574   X86_64ManagedRegister vvvv_reg =
1575       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1576   if (is_twobyte_form) {
1577     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1578   } else {
1579     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1580                                    /*X=*/ false,
1581                                    src2.NeedsRex(),
1582                                    SET_VEX_M_0F);
1583     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1584   }
1585   EmitUint8(ByteZero);
1586   EmitUint8(ByteOne);
1587   if (!is_twobyte_form) {
1588     EmitUint8(ByteTwo);
1589   }
1590   EmitUint8(0x5E);
1591   EmitXmmRegisterOperand(dst.LowBits(), src2);
1592 }
1593 
1594 
movdqa(XmmRegister dst,XmmRegister src)1595 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1596   if (CpuHasAVXorAVX2FeatureFlag()) {
1597     vmovdqa(dst, src);
1598     return;
1599   }
1600   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1601   EmitUint8(0x66);
1602   EmitOptionalRex32(dst, src);
1603   EmitUint8(0x0F);
1604   EmitUint8(0x6F);
1605   EmitXmmRegisterOperand(dst.LowBits(), src);
1606 }
1607 
1608 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1609 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1610   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1611   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1612   uint8_t ByteZero, ByteOne, ByteTwo;
1613   bool is_twobyte_form = true;
1614 
1615   // Instruction VEX Prefix
1616   if (src.NeedsRex() && dst.NeedsRex()) {
1617     is_twobyte_form = false;
1618   }
1619   bool load = dst.NeedsRex();
1620   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1621   if (is_twobyte_form) {
1622     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1623     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1624     ByteOne = EmitVexPrefixByteOne(rex_bit,
1625                                    vvvv_reg,
1626                                    SET_VEX_L_128,
1627                                    SET_VEX_PP_66);
1628   } else {
1629     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1630                                    /*X=*/ false,
1631                                    src.NeedsRex(),
1632                                    SET_VEX_M_0F);
1633     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1634                                    SET_VEX_L_128,
1635                                    SET_VEX_PP_66);
1636   }
1637   EmitUint8(ByteZero);
1638   EmitUint8(ByteOne);
1639   if (!is_twobyte_form) {
1640     EmitUint8(ByteTwo);
1641   }
1642   // Instruction Opcode
1643   if (is_twobyte_form && !load) {
1644     EmitUint8(0x7F);
1645   } else {
1646     EmitUint8(0x6F);
1647   }
1648   // Instruction Operands
1649   if (is_twobyte_form && !load) {
1650     EmitXmmRegisterOperand(src.LowBits(), dst);
1651   } else {
1652     EmitXmmRegisterOperand(dst.LowBits(), src);
1653   }
1654 }
1655 
movdqa(XmmRegister dst,const Address & src)1656 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1657   if (CpuHasAVXorAVX2FeatureFlag()) {
1658     vmovdqa(dst, src);
1659     return;
1660   }
1661   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1662   EmitUint8(0x66);
1663   EmitOptionalRex32(dst, src);
1664   EmitUint8(0x0F);
1665   EmitUint8(0x6F);
1666   EmitOperand(dst.LowBits(), src);
1667 }
1668 
1669 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1670 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1671   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1672   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1673   uint8_t  ByteZero, ByteOne, ByteTwo;
1674   bool is_twobyte_form = false;
1675 
1676   // Instruction VEX Prefix
1677   uint8_t rex = src.rex();
1678   bool Rex_x = rex & GET_REX_X;
1679   bool Rex_b = rex & GET_REX_B;
1680   if (!Rex_x && !Rex_b) {
1681     is_twobyte_form = true;
1682   }
1683   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1684   if (is_twobyte_form) {
1685     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1686     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1687                                    vvvv_reg,
1688                                    SET_VEX_L_128,
1689                                    SET_VEX_PP_66);
1690   } else {
1691     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1692                                    Rex_x,
1693                                    Rex_b,
1694                                    SET_VEX_M_0F);
1695     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1696                                    SET_VEX_L_128,
1697                                    SET_VEX_PP_66);
1698   }
1699   EmitUint8(ByteZero);
1700   EmitUint8(ByteOne);
1701   if (!is_twobyte_form) {
1702     EmitUint8(ByteTwo);
1703   }
1704   // Instruction Opcode
1705   EmitUint8(0x6F);
1706   // Instruction Operands
1707   EmitOperand(dst.LowBits(), src);
1708 }
1709 
movdqu(XmmRegister dst,const Address & src)1710 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1711   if (CpuHasAVXorAVX2FeatureFlag()) {
1712     vmovdqu(dst, src);
1713     return;
1714   }
1715   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1716   EmitUint8(0xF3);
1717   EmitOptionalRex32(dst, src);
1718   EmitUint8(0x0F);
1719   EmitUint8(0x6F);
1720   EmitOperand(dst.LowBits(), src);
1721 }
1722 
1723 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1724 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1725 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1726   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1727   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1728   uint8_t ByteZero, ByteOne, ByteTwo;
1729   bool is_twobyte_form = false;
1730 
1731   // Instruction VEX Prefix
1732   uint8_t rex = src.rex();
1733   bool Rex_x = rex & GET_REX_X;
1734   bool Rex_b = rex & GET_REX_B;
1735   if (!Rex_x && !Rex_b) {
1736     is_twobyte_form = true;
1737   }
1738   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1739   if (is_twobyte_form) {
1740     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1741     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1742                                    vvvv_reg,
1743                                    SET_VEX_L_128,
1744                                    SET_VEX_PP_F3);
1745   } else {
1746     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1747                                    Rex_x,
1748                                    Rex_b,
1749                                    SET_VEX_M_0F);
1750     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1751                                    SET_VEX_L_128,
1752                                    SET_VEX_PP_F3);
1753   }
1754   EmitUint8(ByteZero);
1755   EmitUint8(ByteOne);
1756   if (!is_twobyte_form) {
1757     EmitUint8(ByteTwo);
1758   }
1759   // Instruction Opcode
1760   EmitUint8(0x6F);
1761   // Instruction Operands
1762   EmitOperand(dst.LowBits(), src);
1763 }
1764 
movdqa(const Address & dst,XmmRegister src)1765 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1766   if (CpuHasAVXorAVX2FeatureFlag()) {
1767     vmovdqa(dst, src);
1768     return;
1769   }
1770   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1771   EmitUint8(0x66);
1772   EmitOptionalRex32(src, dst);
1773   EmitUint8(0x0F);
1774   EmitUint8(0x7F);
1775   EmitOperand(src.LowBits(), dst);
1776 }
1777 
1778 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1779 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1780   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1781   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1782   bool is_twobyte_form = false;
1783   uint8_t ByteZero, ByteOne, ByteTwo;
1784   // Instruction VEX Prefix
1785   uint8_t rex = dst.rex();
1786   bool Rex_x = rex & GET_REX_X;
1787   bool Rex_b = rex & GET_REX_B;
1788   if (!Rex_x && !Rex_b) {
1789     is_twobyte_form = true;
1790   }
1791   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1792   if (is_twobyte_form) {
1793     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1794     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1795                                    vvvv_reg,
1796                                    SET_VEX_L_128,
1797                                    SET_VEX_PP_66);
1798   } else {
1799     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1800                                    Rex_x,
1801                                    Rex_b,
1802                                    SET_VEX_M_0F);
1803     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1804                                    SET_VEX_L_128,
1805                                    SET_VEX_PP_66);
1806   }
1807   EmitUint8(ByteZero);
1808   EmitUint8(ByteOne);
1809   if (!is_twobyte_form) {
1810     EmitUint8(ByteTwo);
1811   }
1812   // Instruction Opcode
1813   EmitUint8(0x7F);
1814   // Instruction Operands
1815   EmitOperand(src.LowBits(), dst);
1816 }
1817 
movdqu(const Address & dst,XmmRegister src)1818 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1819   if (CpuHasAVXorAVX2FeatureFlag()) {
1820     vmovdqu(dst, src);
1821     return;
1822   }
1823   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1824   EmitUint8(0xF3);
1825   EmitOptionalRex32(src, dst);
1826   EmitUint8(0x0F);
1827   EmitUint8(0x7F);
1828   EmitOperand(src.LowBits(), dst);
1829 }
1830 
1831 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1832 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1833   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1834   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1835   uint8_t ByteZero, ByteOne, ByteTwo;
1836   bool is_twobyte_form = false;
1837 
1838   // Instruction VEX Prefix
1839   uint8_t rex = dst.rex();
1840   bool Rex_x = rex & GET_REX_X;
1841   bool Rex_b = rex & GET_REX_B;
1842   if (!Rex_b && !Rex_x) {
1843     is_twobyte_form = true;
1844   }
1845   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846   if (is_twobyte_form) {
1847     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1848     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1849                                    vvvv_reg,
1850                                    SET_VEX_L_128,
1851                                    SET_VEX_PP_F3);
1852   } else {
1853     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1854                                    Rex_x,
1855                                    Rex_b,
1856                                    SET_VEX_M_0F);
1857     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1858                                    SET_VEX_L_128,
1859                                    SET_VEX_PP_F3);
1860   }
1861   EmitUint8(ByteZero);
1862   EmitUint8(ByteOne);
1863   if (!is_twobyte_form) {
1864     EmitUint8(ByteTwo);
1865   }
1866   // Instruction Opcode
1867   EmitUint8(0x7F);
1868   // Instruction Operands
1869   EmitOperand(src.LowBits(), dst);
1870 }
1871 
paddb(XmmRegister dst,XmmRegister src)1872 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1873   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1874   EmitUint8(0x66);
1875   EmitOptionalRex32(dst, src);
1876   EmitUint8(0x0F);
1877   EmitUint8(0xFC);
1878   EmitXmmRegisterOperand(dst.LowBits(), src);
1879 }
1880 
1881 
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1882 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1883   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1884   uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1885   bool is_twobyte_form = false;
1886   if (!add_right.NeedsRex()) {
1887     is_twobyte_form = true;
1888   } else if (!add_left.NeedsRex()) {
1889     return vpaddb(dst, add_right, add_left);
1890   }
1891   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1892   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1893   X86_64ManagedRegister vvvv_reg =
1894       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1895   if (is_twobyte_form) {
1896     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1897   } else {
1898     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1899                                    /*X=*/ false,
1900                                    add_right.NeedsRex(),
1901                                    SET_VEX_M_0F);
1902     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1903   }
1904   EmitUint8(ByteZero);
1905   EmitUint8(ByteOne);
1906   if (!is_twobyte_form) {
1907     EmitUint8(ByteTwo);
1908   }
1909   EmitUint8(0xFC);
1910   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1911 }
1912 
1913 
psubb(XmmRegister dst,XmmRegister src)1914 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1915   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1916   EmitUint8(0x66);
1917   EmitOptionalRex32(dst, src);
1918   EmitUint8(0x0F);
1919   EmitUint8(0xF8);
1920   EmitXmmRegisterOperand(dst.LowBits(), src);
1921 }
1922 
1923 
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1924 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1925   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1927   bool is_twobyte_form = false;
1928   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1929   if (!add_right.NeedsRex()) {
1930     is_twobyte_form = true;
1931   }
1932   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1933   X86_64ManagedRegister vvvv_reg =
1934       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1935   if (is_twobyte_form) {
1936     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1937   } else {
1938     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1939                                    /*X=*/ false,
1940                                    add_right.NeedsRex(),
1941                                    SET_VEX_M_0F);
1942     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1943   }
1944   EmitUint8(ByteZero);
1945   EmitUint8(ByteOne);
1946   if (!is_twobyte_form) {
1947     EmitUint8(ByteTwo);
1948   }
1949   EmitUint8(0xF8);
1950   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1951 }
1952 
1953 
paddw(XmmRegister dst,XmmRegister src)1954 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1956   EmitUint8(0x66);
1957   EmitOptionalRex32(dst, src);
1958   EmitUint8(0x0F);
1959   EmitUint8(0xFD);
1960   EmitXmmRegisterOperand(dst.LowBits(), src);
1961 }
1962 
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1963 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1964   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1965   bool is_twobyte_form = false;
1966   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1967   if (!add_right.NeedsRex()) {
1968     is_twobyte_form = true;
1969   } else if (!add_left.NeedsRex()) {
1970     return vpaddw(dst, add_right, add_left);
1971   }
1972   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1973   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1974   X86_64ManagedRegister vvvv_reg =
1975       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1976   if (is_twobyte_form) {
1977     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1978   } else {
1979     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1980                                    /*X=*/ false,
1981                                    add_right.NeedsRex(),
1982                                    SET_VEX_M_0F);
1983     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1984   }
1985   EmitUint8(ByteZero);
1986   EmitUint8(ByteOne);
1987   if (!is_twobyte_form) {
1988     EmitUint8(ByteTwo);
1989   }
1990   EmitUint8(0xFD);
1991   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1992 }
1993 
1994 
psubw(XmmRegister dst,XmmRegister src)1995 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1996   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997   EmitUint8(0x66);
1998   EmitOptionalRex32(dst, src);
1999   EmitUint8(0x0F);
2000   EmitUint8(0xF9);
2001   EmitXmmRegisterOperand(dst.LowBits(), src);
2002 }
2003 
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2004 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2005   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2006   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2007   bool is_twobyte_form = false;
2008   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2009   if (!add_right.NeedsRex()) {
2010     is_twobyte_form = true;
2011   }
2012   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2013   X86_64ManagedRegister vvvv_reg =
2014       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2015   if (is_twobyte_form) {
2016     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2017   } else {
2018     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2019                                    /*X=*/ false,
2020                                    add_right.NeedsRex(),
2021                                    SET_VEX_M_0F);
2022     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2023   }
2024   EmitUint8(ByteZero);
2025   EmitUint8(ByteOne);
2026   if (!is_twobyte_form) {
2027     EmitUint8(ByteTwo);
2028   }
2029   EmitUint8(0xF9);
2030   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2031 }
2032 
2033 
pmullw(XmmRegister dst,XmmRegister src)2034 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
2035   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2036   EmitUint8(0x66);
2037   EmitOptionalRex32(dst, src);
2038   EmitUint8(0x0F);
2039   EmitUint8(0xD5);
2040   EmitXmmRegisterOperand(dst.LowBits(), src);
2041 }
2042 
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)2043 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2044   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2045   bool is_twobyte_form = false;
2046   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2047   if (!src2.NeedsRex()) {
2048     is_twobyte_form = true;
2049   } else if (!src1.NeedsRex()) {
2050     return vpmullw(dst, src2, src1);
2051   }
2052   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2053   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2054   X86_64ManagedRegister vvvv_reg =
2055       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2056   if (is_twobyte_form) {
2057     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2058   } else {
2059     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2060                                    /*X=*/ false,
2061                                    src2.NeedsRex(),
2062                                    SET_VEX_M_0F);
2063     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2064   }
2065   EmitUint8(ByteZero);
2066   EmitUint8(ByteOne);
2067   if (!is_twobyte_form) {
2068     EmitUint8(ByteTwo);
2069   }
2070   EmitUint8(0xD5);
2071   EmitXmmRegisterOperand(dst.LowBits(), src2);
2072 }
2073 
paddd(XmmRegister dst,XmmRegister src)2074 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2075   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2076   EmitUint8(0x66);
2077   EmitOptionalRex32(dst, src);
2078   EmitUint8(0x0F);
2079   EmitUint8(0xFE);
2080   EmitXmmRegisterOperand(dst.LowBits(), src);
2081 }
2082 
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2083 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2084   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2085   bool is_twobyte_form = false;
2086   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2087   if (!add_right.NeedsRex()) {
2088     is_twobyte_form = true;
2089   } else if (!add_left.NeedsRex()) {
2090     return vpaddd(dst, add_right, add_left);
2091   }
2092   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2093   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2094   X86_64ManagedRegister vvvv_reg =
2095       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2096   if (is_twobyte_form) {
2097     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2098   } else {
2099     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2100                                    /*X=*/ false,
2101                                    add_right.NeedsRex(),
2102                                    SET_VEX_M_0F);
2103     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2104   }
2105   EmitUint8(ByteZero);
2106   EmitUint8(ByteOne);
2107   if (!is_twobyte_form) {
2108     EmitUint8(ByteTwo);
2109   }
2110   EmitUint8(0xFE);
2111   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2112 }
2113 
psubd(XmmRegister dst,XmmRegister src)2114 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2116   EmitUint8(0x66);
2117   EmitOptionalRex32(dst, src);
2118   EmitUint8(0x0F);
2119   EmitUint8(0xFA);
2120   EmitXmmRegisterOperand(dst.LowBits(), src);
2121 }
2122 
2123 
pmulld(XmmRegister dst,XmmRegister src)2124 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2126   EmitUint8(0x66);
2127   EmitOptionalRex32(dst, src);
2128   EmitUint8(0x0F);
2129   EmitUint8(0x38);
2130   EmitUint8(0x40);
2131   EmitXmmRegisterOperand(dst.LowBits(), src);
2132 }
2133 
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2134 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2135   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2136   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2137   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2138   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2139   X86_64ManagedRegister vvvv_reg =
2140       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2141   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2142                                    /*X=*/ false,
2143                                    src2.NeedsRex(),
2144                                    SET_VEX_M_0F_38);
2145   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2146   EmitUint8(ByteZero);
2147   EmitUint8(ByteOne);
2148   EmitUint8(ByteTwo);
2149   EmitUint8(0x40);
2150   EmitXmmRegisterOperand(dst.LowBits(), src2);
2151 }
2152 
paddq(XmmRegister dst,XmmRegister src)2153 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2154   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2155   EmitUint8(0x66);
2156   EmitOptionalRex32(dst, src);
2157   EmitUint8(0x0F);
2158   EmitUint8(0xD4);
2159   EmitXmmRegisterOperand(dst.LowBits(), src);
2160 }
2161 
2162 
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2163 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2164   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2165   bool is_twobyte_form = false;
2166   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2167   if (!add_right.NeedsRex()) {
2168     is_twobyte_form = true;
2169   } else if (!add_left.NeedsRex()) {
2170     return vpaddq(dst, add_right, add_left);
2171   }
2172   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2173   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2174   X86_64ManagedRegister vvvv_reg =
2175       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2176   if (is_twobyte_form) {
2177     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2178   } else {
2179     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2180                                    /*X=*/ false,
2181                                    add_right.NeedsRex(),
2182                                    SET_VEX_M_0F);
2183     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2184   }
2185   EmitUint8(ByteZero);
2186   EmitUint8(ByteOne);
2187   if (!is_twobyte_form) {
2188     EmitUint8(ByteTwo);
2189   }
2190   EmitUint8(0xD4);
2191   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2192 }
2193 
2194 
psubq(XmmRegister dst,XmmRegister src)2195 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2197   EmitUint8(0x66);
2198   EmitOptionalRex32(dst, src);
2199   EmitUint8(0x0F);
2200   EmitUint8(0xFB);
2201   EmitXmmRegisterOperand(dst.LowBits(), src);
2202 }
2203 
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2204 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2205   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2206   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2207   bool is_twobyte_form = false;
2208   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2209   if (!add_right.NeedsRex()) {
2210     is_twobyte_form = true;
2211   }
2212   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2213   X86_64ManagedRegister vvvv_reg =
2214       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2215   if (is_twobyte_form) {
2216     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2217   } else {
2218     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2219                                    /*X=*/ false,
2220                                    add_right.NeedsRex(),
2221                                    SET_VEX_M_0F);
2222     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2223   }
2224   EmitUint8(ByteZero);
2225   EmitUint8(ByteOne);
2226   if (!is_twobyte_form) {
2227     EmitUint8(ByteTwo);
2228   }
2229   EmitUint8(0xFB);
2230   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2231 }
2232 
2233 
paddusb(XmmRegister dst,XmmRegister src)2234 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2235   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2236   EmitUint8(0x66);
2237   EmitOptionalRex32(dst, src);
2238   EmitUint8(0x0F);
2239   EmitUint8(0xDC);
2240   EmitXmmRegisterOperand(dst.LowBits(), src);
2241 }
2242 
2243 
paddsb(XmmRegister dst,XmmRegister src)2244 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2246   EmitUint8(0x66);
2247   EmitOptionalRex32(dst, src);
2248   EmitUint8(0x0F);
2249   EmitUint8(0xEC);
2250   EmitXmmRegisterOperand(dst.LowBits(), src);
2251 }
2252 
2253 
paddusw(XmmRegister dst,XmmRegister src)2254 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2255   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2256   EmitUint8(0x66);
2257   EmitOptionalRex32(dst, src);
2258   EmitUint8(0x0F);
2259   EmitUint8(0xDD);
2260   EmitXmmRegisterOperand(dst.LowBits(), src);
2261 }
2262 
2263 
paddsw(XmmRegister dst,XmmRegister src)2264 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2265   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2266   EmitUint8(0x66);
2267   EmitOptionalRex32(dst, src);
2268   EmitUint8(0x0F);
2269   EmitUint8(0xED);
2270   EmitXmmRegisterOperand(dst.LowBits(), src);
2271 }
2272 
2273 
psubusb(XmmRegister dst,XmmRegister src)2274 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2275   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2276   EmitUint8(0x66);
2277   EmitOptionalRex32(dst, src);
2278   EmitUint8(0x0F);
2279   EmitUint8(0xD8);
2280   EmitXmmRegisterOperand(dst.LowBits(), src);
2281 }
2282 
2283 
psubsb(XmmRegister dst,XmmRegister src)2284 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2286   EmitUint8(0x66);
2287   EmitOptionalRex32(dst, src);
2288   EmitUint8(0x0F);
2289   EmitUint8(0xE8);
2290   EmitXmmRegisterOperand(dst.LowBits(), src);
2291 }
2292 
2293 
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2294 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2295   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2297   bool is_twobyte_form = false;
2298   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2299   if (!add_right.NeedsRex()) {
2300     is_twobyte_form = true;
2301   }
2302   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2303   X86_64ManagedRegister vvvv_reg =
2304       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2305   if (is_twobyte_form) {
2306     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2307   } else {
2308     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2309                                    /*X=*/ false,
2310                                    add_right.NeedsRex(),
2311                                    SET_VEX_M_0F);
2312     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2313   }
2314   EmitUint8(ByteZero);
2315   EmitUint8(ByteOne);
2316   if (!is_twobyte_form) {
2317     EmitUint8(ByteTwo);
2318   }
2319   EmitUint8(0xFA);
2320   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2321 }
2322 
2323 
psubusw(XmmRegister dst,XmmRegister src)2324 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2326   EmitUint8(0x66);
2327   EmitOptionalRex32(dst, src);
2328   EmitUint8(0x0F);
2329   EmitUint8(0xD9);
2330   EmitXmmRegisterOperand(dst.LowBits(), src);
2331 }
2332 
2333 
psubsw(XmmRegister dst,XmmRegister src)2334 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2335   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2336   EmitUint8(0x66);
2337   EmitOptionalRex32(dst, src);
2338   EmitUint8(0x0F);
2339   EmitUint8(0xE9);
2340   EmitXmmRegisterOperand(dst.LowBits(), src);
2341 }
2342 
2343 
cvtsi2ss(XmmRegister dst,CpuRegister src)2344 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2345   cvtsi2ss(dst, src, false);
2346 }
2347 
2348 
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2349 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2350   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2351   EmitUint8(0xF3);
2352   if (is64bit) {
2353     // Emit a REX.W prefix if the operand size is 64 bits.
2354     EmitRex64(dst, src);
2355   } else {
2356     EmitOptionalRex32(dst, src);
2357   }
2358   EmitUint8(0x0F);
2359   EmitUint8(0x2A);
2360   EmitOperand(dst.LowBits(), Operand(src));
2361 }
2362 
2363 
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2364 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2366   EmitUint8(0xF3);
2367   if (is64bit) {
2368     // Emit a REX.W prefix if the operand size is 64 bits.
2369     EmitRex64(dst, src);
2370   } else {
2371     EmitOptionalRex32(dst, src);
2372   }
2373   EmitUint8(0x0F);
2374   EmitUint8(0x2A);
2375   EmitOperand(dst.LowBits(), src);
2376 }
2377 
2378 
cvtsi2sd(XmmRegister dst,CpuRegister src)2379 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2380   cvtsi2sd(dst, src, false);
2381 }
2382 
2383 
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2384 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2386   EmitUint8(0xF2);
2387   if (is64bit) {
2388     // Emit a REX.W prefix if the operand size is 64 bits.
2389     EmitRex64(dst, src);
2390   } else {
2391     EmitOptionalRex32(dst, src);
2392   }
2393   EmitUint8(0x0F);
2394   EmitUint8(0x2A);
2395   EmitOperand(dst.LowBits(), Operand(src));
2396 }
2397 
2398 
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2399 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2400   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2401   EmitUint8(0xF2);
2402   if (is64bit) {
2403     // Emit a REX.W prefix if the operand size is 64 bits.
2404     EmitRex64(dst, src);
2405   } else {
2406     EmitOptionalRex32(dst, src);
2407   }
2408   EmitUint8(0x0F);
2409   EmitUint8(0x2A);
2410   EmitOperand(dst.LowBits(), src);
2411 }
2412 
2413 
cvtss2si(CpuRegister dst,XmmRegister src)2414 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2415   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2416   EmitUint8(0xF3);
2417   EmitOptionalRex32(dst, src);
2418   EmitUint8(0x0F);
2419   EmitUint8(0x2D);
2420   EmitXmmRegisterOperand(dst.LowBits(), src);
2421 }
2422 
2423 
cvtss2sd(XmmRegister dst,XmmRegister src)2424 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2425   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426   EmitUint8(0xF3);
2427   EmitOptionalRex32(dst, src);
2428   EmitUint8(0x0F);
2429   EmitUint8(0x5A);
2430   EmitXmmRegisterOperand(dst.LowBits(), src);
2431 }
2432 
2433 
cvtss2sd(XmmRegister dst,const Address & src)2434 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2435   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2436   EmitUint8(0xF3);
2437   EmitOptionalRex32(dst, src);
2438   EmitUint8(0x0F);
2439   EmitUint8(0x5A);
2440   EmitOperand(dst.LowBits(), src);
2441 }
2442 
2443 
cvtsd2si(CpuRegister dst,XmmRegister src)2444 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2445   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2446   EmitUint8(0xF2);
2447   EmitOptionalRex32(dst, src);
2448   EmitUint8(0x0F);
2449   EmitUint8(0x2D);
2450   EmitXmmRegisterOperand(dst.LowBits(), src);
2451 }
2452 
2453 
cvttss2si(CpuRegister dst,XmmRegister src)2454 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2455   cvttss2si(dst, src, false);
2456 }
2457 
2458 
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2459 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461   EmitUint8(0xF3);
2462   if (is64bit) {
2463     // Emit a REX.W prefix if the operand size is 64 bits.
2464     EmitRex64(dst, src);
2465   } else {
2466     EmitOptionalRex32(dst, src);
2467   }
2468   EmitUint8(0x0F);
2469   EmitUint8(0x2C);
2470   EmitXmmRegisterOperand(dst.LowBits(), src);
2471 }
2472 
2473 
cvttsd2si(CpuRegister dst,XmmRegister src)2474 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2475   cvttsd2si(dst, src, false);
2476 }
2477 
2478 
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2479 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2480   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2481   EmitUint8(0xF2);
2482   if (is64bit) {
2483     // Emit a REX.W prefix if the operand size is 64 bits.
2484     EmitRex64(dst, src);
2485   } else {
2486     EmitOptionalRex32(dst, src);
2487   }
2488   EmitUint8(0x0F);
2489   EmitUint8(0x2C);
2490   EmitXmmRegisterOperand(dst.LowBits(), src);
2491 }
2492 
2493 
cvtsd2ss(XmmRegister dst,XmmRegister src)2494 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2496   EmitUint8(0xF2);
2497   EmitOptionalRex32(dst, src);
2498   EmitUint8(0x0F);
2499   EmitUint8(0x5A);
2500   EmitXmmRegisterOperand(dst.LowBits(), src);
2501 }
2502 
2503 
cvtsd2ss(XmmRegister dst,const Address & src)2504 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2505   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2506   EmitUint8(0xF2);
2507   EmitOptionalRex32(dst, src);
2508   EmitUint8(0x0F);
2509   EmitUint8(0x5A);
2510   EmitOperand(dst.LowBits(), src);
2511 }
2512 
2513 
cvtdq2ps(XmmRegister dst,XmmRegister src)2514 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2515   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2516   EmitOptionalRex32(dst, src);
2517   EmitUint8(0x0F);
2518   EmitUint8(0x5B);
2519   EmitXmmRegisterOperand(dst.LowBits(), src);
2520 }
2521 
2522 
cvtdq2pd(XmmRegister dst,XmmRegister src)2523 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2524   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2525   EmitUint8(0xF3);
2526   EmitOptionalRex32(dst, src);
2527   EmitUint8(0x0F);
2528   EmitUint8(0xE6);
2529   EmitXmmRegisterOperand(dst.LowBits(), src);
2530 }
2531 
2532 
comiss(XmmRegister a,XmmRegister b)2533 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2534   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2535   EmitOptionalRex32(a, b);
2536   EmitUint8(0x0F);
2537   EmitUint8(0x2F);
2538   EmitXmmRegisterOperand(a.LowBits(), b);
2539 }
2540 
2541 
comiss(XmmRegister a,const Address & b)2542 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2543   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2544   EmitOptionalRex32(a, b);
2545   EmitUint8(0x0F);
2546   EmitUint8(0x2F);
2547   EmitOperand(a.LowBits(), b);
2548 }
2549 
2550 
comisd(XmmRegister a,XmmRegister b)2551 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2552   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2553   EmitUint8(0x66);
2554   EmitOptionalRex32(a, b);
2555   EmitUint8(0x0F);
2556   EmitUint8(0x2F);
2557   EmitXmmRegisterOperand(a.LowBits(), b);
2558 }
2559 
2560 
comisd(XmmRegister a,const Address & b)2561 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2562   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2563   EmitUint8(0x66);
2564   EmitOptionalRex32(a, b);
2565   EmitUint8(0x0F);
2566   EmitUint8(0x2F);
2567   EmitOperand(a.LowBits(), b);
2568 }
2569 
2570 
ucomiss(XmmRegister a,XmmRegister b)2571 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2572   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2573   EmitOptionalRex32(a, b);
2574   EmitUint8(0x0F);
2575   EmitUint8(0x2E);
2576   EmitXmmRegisterOperand(a.LowBits(), b);
2577 }
2578 
2579 
ucomiss(XmmRegister a,const Address & b)2580 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2581   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2582   EmitOptionalRex32(a, b);
2583   EmitUint8(0x0F);
2584   EmitUint8(0x2E);
2585   EmitOperand(a.LowBits(), b);
2586 }
2587 
2588 
ucomisd(XmmRegister a,XmmRegister b)2589 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2590   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2591   EmitUint8(0x66);
2592   EmitOptionalRex32(a, b);
2593   EmitUint8(0x0F);
2594   EmitUint8(0x2E);
2595   EmitXmmRegisterOperand(a.LowBits(), b);
2596 }
2597 
2598 
ucomisd(XmmRegister a,const Address & b)2599 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2600   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2601   EmitUint8(0x66);
2602   EmitOptionalRex32(a, b);
2603   EmitUint8(0x0F);
2604   EmitUint8(0x2E);
2605   EmitOperand(a.LowBits(), b);
2606 }
2607 
2608 
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2609 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2610   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2611   EmitUint8(0x66);
2612   EmitOptionalRex32(dst, src);
2613   EmitUint8(0x0F);
2614   EmitUint8(0x3A);
2615   EmitUint8(0x0B);
2616   EmitXmmRegisterOperand(dst.LowBits(), src);
2617   EmitUint8(imm.value());
2618 }
2619 
2620 
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2621 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2622   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2623   EmitUint8(0x66);
2624   EmitOptionalRex32(dst, src);
2625   EmitUint8(0x0F);
2626   EmitUint8(0x3A);
2627   EmitUint8(0x0A);
2628   EmitXmmRegisterOperand(dst.LowBits(), src);
2629   EmitUint8(imm.value());
2630 }
2631 
2632 
sqrtsd(XmmRegister dst,XmmRegister src)2633 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2634   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2635   EmitUint8(0xF2);
2636   EmitOptionalRex32(dst, src);
2637   EmitUint8(0x0F);
2638   EmitUint8(0x51);
2639   EmitXmmRegisterOperand(dst.LowBits(), src);
2640 }
2641 
2642 
sqrtss(XmmRegister dst,XmmRegister src)2643 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2644   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2645   EmitUint8(0xF3);
2646   EmitOptionalRex32(dst, src);
2647   EmitUint8(0x0F);
2648   EmitUint8(0x51);
2649   EmitXmmRegisterOperand(dst.LowBits(), src);
2650 }
2651 
2652 
xorpd(XmmRegister dst,const Address & src)2653 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2655   EmitUint8(0x66);
2656   EmitOptionalRex32(dst, src);
2657   EmitUint8(0x0F);
2658   EmitUint8(0x57);
2659   EmitOperand(dst.LowBits(), src);
2660 }
2661 
2662 
xorpd(XmmRegister dst,XmmRegister src)2663 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2665   EmitUint8(0x66);
2666   EmitOptionalRex32(dst, src);
2667   EmitUint8(0x0F);
2668   EmitUint8(0x57);
2669   EmitXmmRegisterOperand(dst.LowBits(), src);
2670 }
2671 
2672 
xorps(XmmRegister dst,const Address & src)2673 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2674   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2675   EmitOptionalRex32(dst, src);
2676   EmitUint8(0x0F);
2677   EmitUint8(0x57);
2678   EmitOperand(dst.LowBits(), src);
2679 }
2680 
2681 
xorps(XmmRegister dst,XmmRegister src)2682 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2683   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2684   EmitOptionalRex32(dst, src);
2685   EmitUint8(0x0F);
2686   EmitUint8(0x57);
2687   EmitXmmRegisterOperand(dst.LowBits(), src);
2688 }
2689 
pxor(XmmRegister dst,XmmRegister src)2690 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2692   EmitUint8(0x66);
2693   EmitOptionalRex32(dst, src);
2694   EmitUint8(0x0F);
2695   EmitUint8(0xEF);
2696   EmitXmmRegisterOperand(dst.LowBits(), src);
2697 }
2698 
2699 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2700 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2701   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2702   bool is_twobyte_form = false;
2703   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2704   if (!src2.NeedsRex()) {
2705     is_twobyte_form = true;
2706   } else if (!src1.NeedsRex()) {
2707     return vpxor(dst, src2, src1);
2708   }
2709   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2710   X86_64ManagedRegister vvvv_reg =
2711       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2712   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2713   if (is_twobyte_form) {
2714     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2715   } else {
2716     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2717                                    /*X=*/ false,
2718                                    src2.NeedsRex(),
2719                                    SET_VEX_M_0F);
2720     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2721   }
2722   EmitUint8(ByteZero);
2723   EmitUint8(ByteOne);
2724   if (!is_twobyte_form) {
2725     EmitUint8(ByteTwo);
2726   }
2727   EmitUint8(0xEF);
2728   EmitXmmRegisterOperand(dst.LowBits(), src2);
2729 }
2730 
2731 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2732 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2733   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2734   bool is_twobyte_form = false;
2735   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2736   if (!src2.NeedsRex()) {
2737     is_twobyte_form = true;
2738   } else if (!src1.NeedsRex()) {
2739     return vxorps(dst, src2, src1);
2740   }
2741   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2742   X86_64ManagedRegister vvvv_reg =
2743       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2744   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2745   if (is_twobyte_form) {
2746     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2747   } else {
2748     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2749                                    /*X=*/ false,
2750                                    src2.NeedsRex(),
2751                                    SET_VEX_M_0F);
2752     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2753   }
2754   EmitUint8(ByteZero);
2755   EmitUint8(ByteOne);
2756   if (!is_twobyte_form) {
2757     EmitUint8(ByteTwo);
2758   }
2759   EmitUint8(0x57);
2760   EmitXmmRegisterOperand(dst.LowBits(), src2);
2761 }
2762 
2763 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2764 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2765   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2766   bool is_twobyte_form = false;
2767   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2768   if (!src2.NeedsRex()) {
2769     is_twobyte_form = true;
2770   } else if (!src1.NeedsRex()) {
2771     return vxorpd(dst, src2, src1);
2772   }
2773   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2774   X86_64ManagedRegister vvvv_reg =
2775       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2776   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2777   if (is_twobyte_form) {
2778     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2779   } else {
2780     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2781                                    /*X=*/ false,
2782                                    src2.NeedsRex(),
2783                                    SET_VEX_M_0F);
2784     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2785   }
2786   EmitUint8(ByteZero);
2787   EmitUint8(ByteOne);
2788   if (!is_twobyte_form) {
2789     EmitUint8(ByteTwo);
2790   }
2791   EmitUint8(0x57);
2792   EmitXmmRegisterOperand(dst.LowBits(), src2);
2793 }
2794 
andpd(XmmRegister dst,const Address & src)2795 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2796   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2797   EmitUint8(0x66);
2798   EmitOptionalRex32(dst, src);
2799   EmitUint8(0x0F);
2800   EmitUint8(0x54);
2801   EmitOperand(dst.LowBits(), src);
2802 }
2803 
andpd(XmmRegister dst,XmmRegister src)2804 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2805   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2806   EmitUint8(0x66);
2807   EmitOptionalRex32(dst, src);
2808   EmitUint8(0x0F);
2809   EmitUint8(0x54);
2810   EmitXmmRegisterOperand(dst.LowBits(), src);
2811 }
2812 
andps(XmmRegister dst,XmmRegister src)2813 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2814   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2815   EmitOptionalRex32(dst, src);
2816   EmitUint8(0x0F);
2817   EmitUint8(0x54);
2818   EmitXmmRegisterOperand(dst.LowBits(), src);
2819 }
2820 
pand(XmmRegister dst,XmmRegister src)2821 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2823   EmitUint8(0x66);
2824   EmitOptionalRex32(dst, src);
2825   EmitUint8(0x0F);
2826   EmitUint8(0xDB);
2827   EmitXmmRegisterOperand(dst.LowBits(), src);
2828 }
2829 
2830 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2831 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2832   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2833   bool is_twobyte_form = false;
2834   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835   if (!src2.NeedsRex()) {
2836     is_twobyte_form = true;
2837   } else if (!src1.NeedsRex()) {
2838     return vpand(dst, src2, src1);
2839   }
2840   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2841   X86_64ManagedRegister vvvv_reg =
2842       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2843   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2844   if (is_twobyte_form) {
2845     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2846   } else {
2847     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2848                                    /*X=*/ false,
2849                                    src2.NeedsRex(),
2850                                    SET_VEX_M_0F);
2851     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2852   }
2853   EmitUint8(ByteZero);
2854   EmitUint8(ByteOne);
2855   if (!is_twobyte_form) {
2856     EmitUint8(ByteTwo);
2857   }
2858   EmitUint8(0xDB);
2859   EmitXmmRegisterOperand(dst.LowBits(), src2);
2860 }
2861 
2862 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2863 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2864   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2865   bool is_twobyte_form = false;
2866   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2867   if (!src2.NeedsRex()) {
2868     is_twobyte_form = true;
2869   } else if (!src1.NeedsRex()) {
2870     return vandps(dst, src2, src1);
2871   }
2872   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2873   X86_64ManagedRegister vvvv_reg =
2874       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2875   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2876   if (is_twobyte_form) {
2877     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2878   } else {
2879     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2880                                    /*X=*/ false,
2881                                    src2.NeedsRex(),
2882                                    SET_VEX_M_0F);
2883     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2884   }
2885   EmitUint8(ByteZero);
2886   EmitUint8(ByteOne);
2887   if (!is_twobyte_form) {
2888     EmitUint8(ByteTwo);
2889   }
2890   EmitUint8(0x54);
2891   EmitXmmRegisterOperand(dst.LowBits(), src2);
2892 }
2893 
2894 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2895 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2896   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2897   bool is_twobyte_form = false;
2898   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2899   if (!src2.NeedsRex()) {
2900     is_twobyte_form = true;
2901   } else if (!src1.NeedsRex()) {
2902     return vandpd(dst, src2, src1);
2903   }
2904   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2905   X86_64ManagedRegister vvvv_reg =
2906       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2907   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2908   if (is_twobyte_form) {
2909     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2910   } else {
2911     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2912                                    /*X=*/ false,
2913                                    src2.NeedsRex(),
2914                                    SET_VEX_M_0F);
2915     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2916   }
2917   EmitUint8(ByteZero);
2918   EmitUint8(ByteOne);
2919   if (!is_twobyte_form) {
2920     EmitUint8(ByteTwo);
2921   }
2922   EmitUint8(0x54);
2923   EmitXmmRegisterOperand(dst.LowBits(), src2);
2924 }
2925 
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2926 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2927   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2928   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2929   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2930                                           /*X=*/ false,
2931                                           src2.NeedsRex(),
2932                                           SET_VEX_M_0F_38);
2933   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2934                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2935                                           SET_VEX_L_128,
2936                                           SET_VEX_PP_NONE);
2937   EmitUint8(byte_zero);
2938   EmitUint8(byte_one);
2939   EmitUint8(byte_two);
2940   // Opcode field
2941   EmitUint8(0xF2);
2942   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2943 }
2944 
andnpd(XmmRegister dst,XmmRegister src)2945 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2946   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2947   EmitUint8(0x66);
2948   EmitOptionalRex32(dst, src);
2949   EmitUint8(0x0F);
2950   EmitUint8(0x55);
2951   EmitXmmRegisterOperand(dst.LowBits(), src);
2952 }
2953 
andnps(XmmRegister dst,XmmRegister src)2954 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2956   EmitOptionalRex32(dst, src);
2957   EmitUint8(0x0F);
2958   EmitUint8(0x55);
2959   EmitXmmRegisterOperand(dst.LowBits(), src);
2960 }
2961 
pandn(XmmRegister dst,XmmRegister src)2962 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2963   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2964   EmitUint8(0x66);
2965   EmitOptionalRex32(dst, src);
2966   EmitUint8(0x0F);
2967   EmitUint8(0xDF);
2968   EmitXmmRegisterOperand(dst.LowBits(), src);
2969 }
2970 
2971 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2972 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2973   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975   bool is_twobyte_form = false;
2976   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2977   if (!src2.NeedsRex()) {
2978     is_twobyte_form = true;
2979   }
2980   X86_64ManagedRegister vvvv_reg =
2981       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2982   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2983   if (is_twobyte_form) {
2984     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2985   } else {
2986     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2987                                    /*X=*/ false,
2988                                    src2.NeedsRex(),
2989                                    SET_VEX_M_0F);
2990     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2991   }
2992   EmitUint8(ByteZero);
2993   EmitUint8(ByteOne);
2994   if (!is_twobyte_form) {
2995     EmitUint8(ByteTwo);
2996   }
2997   EmitUint8(0xDF);
2998   EmitXmmRegisterOperand(dst.LowBits(), src2);
2999 }
3000 
3001 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3002 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3003   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005   bool is_twobyte_form = false;
3006   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3007   if (!src2.NeedsRex()) {
3008     is_twobyte_form = true;
3009   }
3010   X86_64ManagedRegister vvvv_reg =
3011       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3012   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3013   if (is_twobyte_form) {
3014     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3015   } else {
3016     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3017                                    /*X=*/ false,
3018                                    src2.NeedsRex(),
3019                                    SET_VEX_M_0F);
3020     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3021   }
3022   EmitUint8(ByteZero);
3023   EmitUint8(ByteOne);
3024   if (!is_twobyte_form) {
3025     EmitUint8(ByteTwo);
3026   }
3027   EmitUint8(0x55);
3028   EmitXmmRegisterOperand(dst.LowBits(), src2);
3029 }
3030 
3031 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3032 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3033   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3035   bool is_twobyte_form = false;
3036   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3037   if (!src2.NeedsRex()) {
3038     is_twobyte_form = true;
3039   }
3040   X86_64ManagedRegister vvvv_reg =
3041       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3042   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3043   if (is_twobyte_form) {
3044     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3045   } else {
3046     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3047                                    /*X=*/ false,
3048                                    src2.NeedsRex(),
3049                                    SET_VEX_M_0F);
3050     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3051   }
3052   EmitUint8(ByteZero);
3053   EmitUint8(ByteOne);
3054   if (!is_twobyte_form) {
3055     EmitUint8(ByteTwo);
3056   }
3057   EmitUint8(0x55);
3058   EmitXmmRegisterOperand(dst.LowBits(), src2);
3059 }
3060 
orpd(XmmRegister dst,XmmRegister src)3061 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
3062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3063   EmitUint8(0x66);
3064   EmitOptionalRex32(dst, src);
3065   EmitUint8(0x0F);
3066   EmitUint8(0x56);
3067   EmitXmmRegisterOperand(dst.LowBits(), src);
3068 }
3069 
orps(XmmRegister dst,XmmRegister src)3070 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3072   EmitOptionalRex32(dst, src);
3073   EmitUint8(0x0F);
3074   EmitUint8(0x56);
3075   EmitXmmRegisterOperand(dst.LowBits(), src);
3076 }
3077 
por(XmmRegister dst,XmmRegister src)3078 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3080   EmitUint8(0x66);
3081   EmitOptionalRex32(dst, src);
3082   EmitUint8(0x0F);
3083   EmitUint8(0xEB);
3084   EmitXmmRegisterOperand(dst.LowBits(), src);
3085 }
3086 
3087 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3088 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3089   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3090   bool is_twobyte_form = false;
3091   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3092   if (!src2.NeedsRex()) {
3093     is_twobyte_form = true;
3094   } else if (!src1.NeedsRex()) {
3095     return vpor(dst, src2, src1);
3096   }
3097   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3098   X86_64ManagedRegister vvvv_reg =
3099       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3100   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3101   if (is_twobyte_form) {
3102     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3103   } else {
3104     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3105                                    /*X=*/ false,
3106                                    src2.NeedsRex(),
3107                                    SET_VEX_M_0F);
3108     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3109   }
3110   EmitUint8(ByteZero);
3111   EmitUint8(ByteOne);
3112   if (!is_twobyte_form) {
3113     EmitUint8(ByteTwo);
3114   }
3115   EmitUint8(0xEB);
3116   EmitXmmRegisterOperand(dst.LowBits(), src2);
3117 }
3118 
3119 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3120 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3121   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3122   bool is_twobyte_form = false;
3123   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3124   if (!src2.NeedsRex()) {
3125     is_twobyte_form = true;
3126   } else if (!src1.NeedsRex()) {
3127     return vorps(dst, src2, src1);
3128   }
3129   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130   X86_64ManagedRegister vvvv_reg =
3131       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3132   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3133   if (is_twobyte_form) {
3134     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3135   } else {
3136     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3137                                    /*X=*/ false,
3138                                    src2.NeedsRex(),
3139                                    SET_VEX_M_0F);
3140     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3141   }
3142   EmitUint8(ByteZero);
3143   EmitUint8(ByteOne);
3144   if (!is_twobyte_form) {
3145     EmitUint8(ByteTwo);
3146   }
3147   EmitUint8(0x56);
3148   EmitXmmRegisterOperand(dst.LowBits(), src2);
3149 }
3150 
3151 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3152 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3153   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3154   bool is_twobyte_form = false;
3155   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3156   if (!src2.NeedsRex()) {
3157     is_twobyte_form = true;
3158   } else if (!src1.NeedsRex()) {
3159     return vorpd(dst, src2, src1);
3160   }
3161   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3162   X86_64ManagedRegister vvvv_reg =
3163       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3164   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3165   if (is_twobyte_form) {
3166     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3167   } else {
3168     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3169                                    /*X=*/ false,
3170                                    src2.NeedsRex(),
3171                                    SET_VEX_M_0F);
3172     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3173   }
3174   EmitUint8(ByteZero);
3175   EmitUint8(ByteOne);
3176   if (!is_twobyte_form) {
3177     EmitUint8(ByteTwo);
3178   }
3179   EmitUint8(0x56);
3180   EmitXmmRegisterOperand(dst.LowBits(), src2);
3181 }
3182 
pavgb(XmmRegister dst,XmmRegister src)3183 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3184   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3185   EmitUint8(0x66);
3186   EmitOptionalRex32(dst, src);
3187   EmitUint8(0x0F);
3188   EmitUint8(0xE0);
3189   EmitXmmRegisterOperand(dst.LowBits(), src);
3190 }
3191 
pavgw(XmmRegister dst,XmmRegister src)3192 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3193   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3194   EmitUint8(0x66);
3195   EmitOptionalRex32(dst, src);
3196   EmitUint8(0x0F);
3197   EmitUint8(0xE3);
3198   EmitXmmRegisterOperand(dst.LowBits(), src);
3199 }
3200 
psadbw(XmmRegister dst,XmmRegister src)3201 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3202   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3203   EmitUint8(0x66);
3204   EmitOptionalRex32(dst, src);
3205   EmitUint8(0x0F);
3206   EmitUint8(0xF6);
3207   EmitXmmRegisterOperand(dst.LowBits(), src);
3208 }
3209 
pmaddwd(XmmRegister dst,XmmRegister src)3210 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3212   EmitUint8(0x66);
3213   EmitOptionalRex32(dst, src);
3214   EmitUint8(0x0F);
3215   EmitUint8(0xF5);
3216   EmitXmmRegisterOperand(dst.LowBits(), src);
3217 }
3218 
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3219 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3220   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3221   bool is_twobyte_form = false;
3222   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3223   if (!src2.NeedsRex()) {
3224     is_twobyte_form = true;
3225   } else if (!src1.NeedsRex()) {
3226     return vpmaddwd(dst, src2, src1);
3227   }
3228   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3229   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3230   X86_64ManagedRegister vvvv_reg =
3231       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3232   if (is_twobyte_form) {
3233     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3234   } else {
3235     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3236                                    /*X=*/ false,
3237                                    src2.NeedsRex(),
3238                                    SET_VEX_M_0F);
3239     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3240   }
3241   EmitUint8(ByteZero);
3242   EmitUint8(ByteOne);
3243   if (!is_twobyte_form) {
3244     EmitUint8(ByteTwo);
3245   }
3246   EmitUint8(0xF5);
3247   EmitXmmRegisterOperand(dst.LowBits(), src2);
3248 }
3249 
phaddw(XmmRegister dst,XmmRegister src)3250 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252   EmitUint8(0x66);
3253   EmitOptionalRex32(dst, src);
3254   EmitUint8(0x0F);
3255   EmitUint8(0x38);
3256   EmitUint8(0x01);
3257   EmitXmmRegisterOperand(dst.LowBits(), src);
3258 }
3259 
phaddd(XmmRegister dst,XmmRegister src)3260 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3262   EmitUint8(0x66);
3263   EmitOptionalRex32(dst, src);
3264   EmitUint8(0x0F);
3265   EmitUint8(0x38);
3266   EmitUint8(0x02);
3267   EmitXmmRegisterOperand(dst.LowBits(), src);
3268 }
3269 
haddps(XmmRegister dst,XmmRegister src)3270 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3271   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3272   EmitUint8(0xF2);
3273   EmitOptionalRex32(dst, src);
3274   EmitUint8(0x0F);
3275   EmitUint8(0x7C);
3276   EmitXmmRegisterOperand(dst.LowBits(), src);
3277 }
3278 
haddpd(XmmRegister dst,XmmRegister src)3279 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3281   EmitUint8(0x66);
3282   EmitOptionalRex32(dst, src);
3283   EmitUint8(0x0F);
3284   EmitUint8(0x7C);
3285   EmitXmmRegisterOperand(dst.LowBits(), src);
3286 }
3287 
phsubw(XmmRegister dst,XmmRegister src)3288 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3289   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3290   EmitUint8(0x66);
3291   EmitOptionalRex32(dst, src);
3292   EmitUint8(0x0F);
3293   EmitUint8(0x38);
3294   EmitUint8(0x05);
3295   EmitXmmRegisterOperand(dst.LowBits(), src);
3296 }
3297 
phsubd(XmmRegister dst,XmmRegister src)3298 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3299   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3300   EmitUint8(0x66);
3301   EmitOptionalRex32(dst, src);
3302   EmitUint8(0x0F);
3303   EmitUint8(0x38);
3304   EmitUint8(0x06);
3305   EmitXmmRegisterOperand(dst.LowBits(), src);
3306 }
3307 
hsubps(XmmRegister dst,XmmRegister src)3308 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3309   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3310   EmitUint8(0xF2);
3311   EmitOptionalRex32(dst, src);
3312   EmitUint8(0x0F);
3313   EmitUint8(0x7D);
3314   EmitXmmRegisterOperand(dst.LowBits(), src);
3315 }
3316 
hsubpd(XmmRegister dst,XmmRegister src)3317 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3318   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3319   EmitUint8(0x66);
3320   EmitOptionalRex32(dst, src);
3321   EmitUint8(0x0F);
3322   EmitUint8(0x7D);
3323   EmitXmmRegisterOperand(dst.LowBits(), src);
3324 }
3325 
pminsb(XmmRegister dst,XmmRegister src)3326 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3327   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3328   EmitUint8(0x66);
3329   EmitOptionalRex32(dst, src);
3330   EmitUint8(0x0F);
3331   EmitUint8(0x38);
3332   EmitUint8(0x38);
3333   EmitXmmRegisterOperand(dst.LowBits(), src);
3334 }
3335 
pmaxsb(XmmRegister dst,XmmRegister src)3336 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3337   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3338   EmitUint8(0x66);
3339   EmitOptionalRex32(dst, src);
3340   EmitUint8(0x0F);
3341   EmitUint8(0x38);
3342   EmitUint8(0x3C);
3343   EmitXmmRegisterOperand(dst.LowBits(), src);
3344 }
3345 
pminsw(XmmRegister dst,XmmRegister src)3346 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3348   EmitUint8(0x66);
3349   EmitOptionalRex32(dst, src);
3350   EmitUint8(0x0F);
3351   EmitUint8(0xEA);
3352   EmitXmmRegisterOperand(dst.LowBits(), src);
3353 }
3354 
pmaxsw(XmmRegister dst,XmmRegister src)3355 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3357   EmitUint8(0x66);
3358   EmitOptionalRex32(dst, src);
3359   EmitUint8(0x0F);
3360   EmitUint8(0xEE);
3361   EmitXmmRegisterOperand(dst.LowBits(), src);
3362 }
3363 
pminsd(XmmRegister dst,XmmRegister src)3364 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3366   EmitUint8(0x66);
3367   EmitOptionalRex32(dst, src);
3368   EmitUint8(0x0F);
3369   EmitUint8(0x38);
3370   EmitUint8(0x39);
3371   EmitXmmRegisterOperand(dst.LowBits(), src);
3372 }
3373 
pmaxsd(XmmRegister dst,XmmRegister src)3374 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3376   EmitUint8(0x66);
3377   EmitOptionalRex32(dst, src);
3378   EmitUint8(0x0F);
3379   EmitUint8(0x38);
3380   EmitUint8(0x3D);
3381   EmitXmmRegisterOperand(dst.LowBits(), src);
3382 }
3383 
pminub(XmmRegister dst,XmmRegister src)3384 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3386   EmitUint8(0x66);
3387   EmitOptionalRex32(dst, src);
3388   EmitUint8(0x0F);
3389   EmitUint8(0xDA);
3390   EmitXmmRegisterOperand(dst.LowBits(), src);
3391 }
3392 
pmaxub(XmmRegister dst,XmmRegister src)3393 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3394   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3395   EmitUint8(0x66);
3396   EmitOptionalRex32(dst, src);
3397   EmitUint8(0x0F);
3398   EmitUint8(0xDE);
3399   EmitXmmRegisterOperand(dst.LowBits(), src);
3400 }
3401 
pminuw(XmmRegister dst,XmmRegister src)3402 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3403   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3404   EmitUint8(0x66);
3405   EmitOptionalRex32(dst, src);
3406   EmitUint8(0x0F);
3407   EmitUint8(0x38);
3408   EmitUint8(0x3A);
3409   EmitXmmRegisterOperand(dst.LowBits(), src);
3410 }
3411 
pmaxuw(XmmRegister dst,XmmRegister src)3412 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3413   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3414   EmitUint8(0x66);
3415   EmitOptionalRex32(dst, src);
3416   EmitUint8(0x0F);
3417   EmitUint8(0x38);
3418   EmitUint8(0x3E);
3419   EmitXmmRegisterOperand(dst.LowBits(), src);
3420 }
3421 
pminud(XmmRegister dst,XmmRegister src)3422 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3423   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3424   EmitUint8(0x66);
3425   EmitOptionalRex32(dst, src);
3426   EmitUint8(0x0F);
3427   EmitUint8(0x38);
3428   EmitUint8(0x3B);
3429   EmitXmmRegisterOperand(dst.LowBits(), src);
3430 }
3431 
pmaxud(XmmRegister dst,XmmRegister src)3432 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3433   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3434   EmitUint8(0x66);
3435   EmitOptionalRex32(dst, src);
3436   EmitUint8(0x0F);
3437   EmitUint8(0x38);
3438   EmitUint8(0x3F);
3439   EmitXmmRegisterOperand(dst.LowBits(), src);
3440 }
3441 
minps(XmmRegister dst,XmmRegister src)3442 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3443   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3444   EmitOptionalRex32(dst, src);
3445   EmitUint8(0x0F);
3446   EmitUint8(0x5D);
3447   EmitXmmRegisterOperand(dst.LowBits(), src);
3448 }
3449 
maxps(XmmRegister dst,XmmRegister src)3450 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3451   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3452   EmitOptionalRex32(dst, src);
3453   EmitUint8(0x0F);
3454   EmitUint8(0x5F);
3455   EmitXmmRegisterOperand(dst.LowBits(), src);
3456 }
3457 
minpd(XmmRegister dst,XmmRegister src)3458 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3459   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3460   EmitUint8(0x66);
3461   EmitOptionalRex32(dst, src);
3462   EmitUint8(0x0F);
3463   EmitUint8(0x5D);
3464   EmitXmmRegisterOperand(dst.LowBits(), src);
3465 }
3466 
maxpd(XmmRegister dst,XmmRegister src)3467 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3468   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3469   EmitUint8(0x66);
3470   EmitOptionalRex32(dst, src);
3471   EmitUint8(0x0F);
3472   EmitUint8(0x5F);
3473   EmitXmmRegisterOperand(dst.LowBits(), src);
3474 }
3475 
pcmpeqb(XmmRegister dst,XmmRegister src)3476 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3478   EmitUint8(0x66);
3479   EmitOptionalRex32(dst, src);
3480   EmitUint8(0x0F);
3481   EmitUint8(0x74);
3482   EmitXmmRegisterOperand(dst.LowBits(), src);
3483 }
3484 
pcmpeqw(XmmRegister dst,XmmRegister src)3485 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3486   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3487   EmitUint8(0x66);
3488   EmitOptionalRex32(dst, src);
3489   EmitUint8(0x0F);
3490   EmitUint8(0x75);
3491   EmitXmmRegisterOperand(dst.LowBits(), src);
3492 }
3493 
pcmpeqd(XmmRegister dst,XmmRegister src)3494 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3496   EmitUint8(0x66);
3497   EmitOptionalRex32(dst, src);
3498   EmitUint8(0x0F);
3499   EmitUint8(0x76);
3500   EmitXmmRegisterOperand(dst.LowBits(), src);
3501 }
3502 
pcmpeqq(XmmRegister dst,XmmRegister src)3503 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3504   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3505   EmitUint8(0x66);
3506   EmitOptionalRex32(dst, src);
3507   EmitUint8(0x0F);
3508   EmitUint8(0x38);
3509   EmitUint8(0x29);
3510   EmitXmmRegisterOperand(dst.LowBits(), src);
3511 }
3512 
pcmpgtb(XmmRegister dst,XmmRegister src)3513 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3514   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3515   EmitUint8(0x66);
3516   EmitOptionalRex32(dst, src);
3517   EmitUint8(0x0F);
3518   EmitUint8(0x64);
3519   EmitXmmRegisterOperand(dst.LowBits(), src);
3520 }
3521 
pcmpgtw(XmmRegister dst,XmmRegister src)3522 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3523   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3524   EmitUint8(0x66);
3525   EmitOptionalRex32(dst, src);
3526   EmitUint8(0x0F);
3527   EmitUint8(0x65);
3528   EmitXmmRegisterOperand(dst.LowBits(), src);
3529 }
3530 
pcmpgtd(XmmRegister dst,XmmRegister src)3531 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3532   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3533   EmitUint8(0x66);
3534   EmitOptionalRex32(dst, src);
3535   EmitUint8(0x0F);
3536   EmitUint8(0x66);
3537   EmitXmmRegisterOperand(dst.LowBits(), src);
3538 }
3539 
pcmpgtq(XmmRegister dst,XmmRegister src)3540 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3541   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3542   EmitUint8(0x66);
3543   EmitOptionalRex32(dst, src);
3544   EmitUint8(0x0F);
3545   EmitUint8(0x38);
3546   EmitUint8(0x37);
3547   EmitXmmRegisterOperand(dst.LowBits(), src);
3548 }
3549 
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3550 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3551   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3552   EmitUint8(0x66);
3553   EmitOptionalRex32(dst, src);
3554   EmitUint8(0x0F);
3555   EmitUint8(0xC6);
3556   EmitXmmRegisterOperand(dst.LowBits(), src);
3557   EmitUint8(imm.value());
3558 }
3559 
3560 
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3561 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3562   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3563   EmitOptionalRex32(dst, src);
3564   EmitUint8(0x0F);
3565   EmitUint8(0xC6);
3566   EmitXmmRegisterOperand(dst.LowBits(), src);
3567   EmitUint8(imm.value());
3568 }
3569 
3570 
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3571 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3572   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3573   EmitUint8(0x66);
3574   EmitOptionalRex32(dst, src);
3575   EmitUint8(0x0F);
3576   EmitUint8(0x70);
3577   EmitXmmRegisterOperand(dst.LowBits(), src);
3578   EmitUint8(imm.value());
3579 }
3580 
3581 
punpcklbw(XmmRegister dst,XmmRegister src)3582 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3583   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3584   EmitUint8(0x66);
3585   EmitOptionalRex32(dst, src);
3586   EmitUint8(0x0F);
3587   EmitUint8(0x60);
3588   EmitXmmRegisterOperand(dst.LowBits(), src);
3589 }
3590 
3591 
punpcklwd(XmmRegister dst,XmmRegister src)3592 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3593   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3594   EmitUint8(0x66);
3595   EmitOptionalRex32(dst, src);
3596   EmitUint8(0x0F);
3597   EmitUint8(0x61);
3598   EmitXmmRegisterOperand(dst.LowBits(), src);
3599 }
3600 
3601 
punpckldq(XmmRegister dst,XmmRegister src)3602 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3603   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3604   EmitUint8(0x66);
3605   EmitOptionalRex32(dst, src);
3606   EmitUint8(0x0F);
3607   EmitUint8(0x62);
3608   EmitXmmRegisterOperand(dst.LowBits(), src);
3609 }
3610 
3611 
punpcklqdq(XmmRegister dst,XmmRegister src)3612 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3613   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614   EmitUint8(0x66);
3615   EmitOptionalRex32(dst, src);
3616   EmitUint8(0x0F);
3617   EmitUint8(0x6C);
3618   EmitXmmRegisterOperand(dst.LowBits(), src);
3619 }
3620 
3621 
punpckhbw(XmmRegister dst,XmmRegister src)3622 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3623   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3624   EmitUint8(0x66);
3625   EmitOptionalRex32(dst, src);
3626   EmitUint8(0x0F);
3627   EmitUint8(0x68);
3628   EmitXmmRegisterOperand(dst.LowBits(), src);
3629 }
3630 
3631 
punpckhwd(XmmRegister dst,XmmRegister src)3632 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3633   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3634   EmitUint8(0x66);
3635   EmitOptionalRex32(dst, src);
3636   EmitUint8(0x0F);
3637   EmitUint8(0x69);
3638   EmitXmmRegisterOperand(dst.LowBits(), src);
3639 }
3640 
3641 
punpckhdq(XmmRegister dst,XmmRegister src)3642 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3643   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3644   EmitUint8(0x66);
3645   EmitOptionalRex32(dst, src);
3646   EmitUint8(0x0F);
3647   EmitUint8(0x6A);
3648   EmitXmmRegisterOperand(dst.LowBits(), src);
3649 }
3650 
3651 
punpckhqdq(XmmRegister dst,XmmRegister src)3652 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3653   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3654   EmitUint8(0x66);
3655   EmitOptionalRex32(dst, src);
3656   EmitUint8(0x0F);
3657   EmitUint8(0x6D);
3658   EmitXmmRegisterOperand(dst.LowBits(), src);
3659 }
3660 
3661 
psllw(XmmRegister reg,const Immediate & shift_count)3662 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3663   DCHECK(shift_count.is_uint8());
3664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3665   EmitUint8(0x66);
3666   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3667   EmitUint8(0x0F);
3668   EmitUint8(0x71);
3669   EmitXmmRegisterOperand(6, reg);
3670   EmitUint8(shift_count.value());
3671 }
3672 
3673 
pslld(XmmRegister reg,const Immediate & shift_count)3674 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3675   DCHECK(shift_count.is_uint8());
3676   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3677   EmitUint8(0x66);
3678   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3679   EmitUint8(0x0F);
3680   EmitUint8(0x72);
3681   EmitXmmRegisterOperand(6, reg);
3682   EmitUint8(shift_count.value());
3683 }
3684 
3685 
psllq(XmmRegister reg,const Immediate & shift_count)3686 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3687   DCHECK(shift_count.is_uint8());
3688   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3689   EmitUint8(0x66);
3690   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3691   EmitUint8(0x0F);
3692   EmitUint8(0x73);
3693   EmitXmmRegisterOperand(6, reg);
3694   EmitUint8(shift_count.value());
3695 }
3696 
3697 
psraw(XmmRegister reg,const Immediate & shift_count)3698 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3699   DCHECK(shift_count.is_uint8());
3700   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3701   EmitUint8(0x66);
3702   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3703   EmitUint8(0x0F);
3704   EmitUint8(0x71);
3705   EmitXmmRegisterOperand(4, reg);
3706   EmitUint8(shift_count.value());
3707 }
3708 
3709 
psrad(XmmRegister reg,const Immediate & shift_count)3710 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3711   DCHECK(shift_count.is_uint8());
3712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3713   EmitUint8(0x66);
3714   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3715   EmitUint8(0x0F);
3716   EmitUint8(0x72);
3717   EmitXmmRegisterOperand(4, reg);
3718   EmitUint8(shift_count.value());
3719 }
3720 
3721 
psrlw(XmmRegister reg,const Immediate & shift_count)3722 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3723   DCHECK(shift_count.is_uint8());
3724   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3725   EmitUint8(0x66);
3726   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3727   EmitUint8(0x0F);
3728   EmitUint8(0x71);
3729   EmitXmmRegisterOperand(2, reg);
3730   EmitUint8(shift_count.value());
3731 }
3732 
3733 
psrld(XmmRegister reg,const Immediate & shift_count)3734 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3735   DCHECK(shift_count.is_uint8());
3736   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3737   EmitUint8(0x66);
3738   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3739   EmitUint8(0x0F);
3740   EmitUint8(0x72);
3741   EmitXmmRegisterOperand(2, reg);
3742   EmitUint8(shift_count.value());
3743 }
3744 
3745 
psrlq(XmmRegister reg,const Immediate & shift_count)3746 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3747   DCHECK(shift_count.is_uint8());
3748   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3749   EmitUint8(0x66);
3750   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3751   EmitUint8(0x0F);
3752   EmitUint8(0x73);
3753   EmitXmmRegisterOperand(2, reg);
3754   EmitUint8(shift_count.value());
3755 }
3756 
3757 
psrldq(XmmRegister reg,const Immediate & shift_count)3758 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3759   DCHECK(shift_count.is_uint8());
3760   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761   EmitUint8(0x66);
3762   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3763   EmitUint8(0x0F);
3764   EmitUint8(0x73);
3765   EmitXmmRegisterOperand(3, reg);
3766   EmitUint8(shift_count.value());
3767 }
3768 
3769 
fldl(const Address & src)3770 void X86_64Assembler::fldl(const Address& src) {
3771   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3772   EmitUint8(0xDD);
3773   EmitOperand(0, src);
3774 }
3775 
3776 
fstl(const Address & dst)3777 void X86_64Assembler::fstl(const Address& dst) {
3778   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3779   EmitUint8(0xDD);
3780   EmitOperand(2, dst);
3781 }
3782 
3783 
fstpl(const Address & dst)3784 void X86_64Assembler::fstpl(const Address& dst) {
3785   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3786   EmitUint8(0xDD);
3787   EmitOperand(3, dst);
3788 }
3789 
3790 
fstsw()3791 void X86_64Assembler::fstsw() {
3792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3793   EmitUint8(0x9B);
3794   EmitUint8(0xDF);
3795   EmitUint8(0xE0);
3796 }
3797 
3798 
fnstcw(const Address & dst)3799 void X86_64Assembler::fnstcw(const Address& dst) {
3800   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3801   EmitUint8(0xD9);
3802   EmitOperand(7, dst);
3803 }
3804 
3805 
fldcw(const Address & src)3806 void X86_64Assembler::fldcw(const Address& src) {
3807   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3808   EmitUint8(0xD9);
3809   EmitOperand(5, src);
3810 }
3811 
3812 
fistpl(const Address & dst)3813 void X86_64Assembler::fistpl(const Address& dst) {
3814   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3815   EmitUint8(0xDF);
3816   EmitOperand(7, dst);
3817 }
3818 
3819 
fistps(const Address & dst)3820 void X86_64Assembler::fistps(const Address& dst) {
3821   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3822   EmitUint8(0xDB);
3823   EmitOperand(3, dst);
3824 }
3825 
3826 
fildl(const Address & src)3827 void X86_64Assembler::fildl(const Address& src) {
3828   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3829   EmitUint8(0xDF);
3830   EmitOperand(5, src);
3831 }
3832 
3833 
filds(const Address & src)3834 void X86_64Assembler::filds(const Address& src) {
3835   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3836   EmitUint8(0xDB);
3837   EmitOperand(0, src);
3838 }
3839 
3840 
fincstp()3841 void X86_64Assembler::fincstp() {
3842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3843   EmitUint8(0xD9);
3844   EmitUint8(0xF7);
3845 }
3846 
3847 
ffree(const Immediate & index)3848 void X86_64Assembler::ffree(const Immediate& index) {
3849   CHECK_LT(index.value(), 7);
3850   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3851   EmitUint8(0xDD);
3852   EmitUint8(0xC0 + index.value());
3853 }
3854 
3855 
fsin()3856 void X86_64Assembler::fsin() {
3857   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3858   EmitUint8(0xD9);
3859   EmitUint8(0xFE);
3860 }
3861 
3862 
fcos()3863 void X86_64Assembler::fcos() {
3864   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3865   EmitUint8(0xD9);
3866   EmitUint8(0xFF);
3867 }
3868 
3869 
fptan()3870 void X86_64Assembler::fptan() {
3871   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3872   EmitUint8(0xD9);
3873   EmitUint8(0xF2);
3874 }
3875 
fucompp()3876 void X86_64Assembler::fucompp() {
3877   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3878   EmitUint8(0xDA);
3879   EmitUint8(0xE9);
3880 }
3881 
3882 
fprem()3883 void X86_64Assembler::fprem() {
3884   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3885   EmitUint8(0xD9);
3886   EmitUint8(0xF8);
3887 }
3888 
3889 
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))3890 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
3891                                    CpuRegister src,
3892                                    void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
3893   Register src_reg = src.AsRegister();
3894   Register dst_reg = dst.AsRegister();
3895   if (src_reg != RAX && dst_reg != RAX) {
3896     return false;
3897   }
3898   if (dst_reg == RAX) {
3899     std::swap(src_reg, dst_reg);
3900   }
3901   if (dst_reg != RAX) {
3902     // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
3903     (this->*prefix_fn)(CpuRegister(dst_reg));
3904   }
3905   EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
3906   return true;
3907 }
3908 
3909 
xchgb(CpuRegister dst,CpuRegister src)3910 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
3911   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3912   // There is no short version for AL.
3913   EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
3914   EmitUint8(0x86);
3915   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3916 }
3917 
3918 
xchgb(CpuRegister reg,const Address & address)3919 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
3920   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921   EmitOptionalByteRegNormalizingRex32(reg, address);
3922   EmitUint8(0x86);
3923   EmitOperand(reg.LowBits(), address);
3924 }
3925 
3926 
xchgw(CpuRegister dst,CpuRegister src)3927 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
3928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929   EmitOperandSizeOverride();
3930   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3931     // A short version for AX.
3932     return;
3933   }
3934   // General case.
3935   EmitOptionalRex32(dst, src);
3936   EmitUint8(0x87);
3937   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3938 }
3939 
3940 
xchgw(CpuRegister reg,const Address & address)3941 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
3942   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3943   EmitOperandSizeOverride();
3944   EmitOptionalRex32(reg, address);
3945   EmitUint8(0x87);
3946   EmitOperand(reg.LowBits(), address);
3947 }
3948 
3949 
xchgl(CpuRegister dst,CpuRegister src)3950 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3952   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3953     // A short version for EAX.
3954     return;
3955   }
3956   // General case.
3957   EmitOptionalRex32(dst, src);
3958   EmitUint8(0x87);
3959   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3960 }
3961 
3962 
xchgl(CpuRegister reg,const Address & address)3963 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3965   EmitOptionalRex32(reg, address);
3966   EmitUint8(0x87);
3967   EmitOperand(reg.LowBits(), address);
3968 }
3969 
3970 
xchgq(CpuRegister dst,CpuRegister src)3971 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3972   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3973   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
3974     // A short version for RAX.
3975     return;
3976   }
3977   // General case.
3978   EmitRex64(dst, src);
3979   EmitUint8(0x87);
3980   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3981 }
3982 
3983 
xchgq(CpuRegister reg,const Address & address)3984 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
3985   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3986   EmitRex64(reg, address);
3987   EmitUint8(0x87);
3988   EmitOperand(reg.LowBits(), address);
3989 }
3990 
3991 
xaddb(CpuRegister dst,CpuRegister src)3992 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
3993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3994   EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
3995   EmitUint8(0x0F);
3996   EmitUint8(0xC0);
3997   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3998 }
3999 
4000 
xaddb(const Address & address,CpuRegister reg)4001 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
4002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4003   EmitOptionalByteRegNormalizingRex32(reg, address);
4004   EmitUint8(0x0F);
4005   EmitUint8(0xC0);
4006   EmitOperand(reg.LowBits(), address);
4007 }
4008 
4009 
xaddw(CpuRegister dst,CpuRegister src)4010 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
4011   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4012   EmitOperandSizeOverride();
4013   EmitOptionalRex32(src, dst);
4014   EmitUint8(0x0F);
4015   EmitUint8(0xC1);
4016   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4017 }
4018 
4019 
xaddw(const Address & address,CpuRegister reg)4020 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
4021   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4022   EmitOperandSizeOverride();
4023   EmitOptionalRex32(reg, address);
4024   EmitUint8(0x0F);
4025   EmitUint8(0xC1);
4026   EmitOperand(reg.LowBits(), address);
4027 }
4028 
4029 
xaddl(CpuRegister dst,CpuRegister src)4030 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
4031   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4032   EmitOptionalRex32(src, dst);
4033   EmitUint8(0x0F);
4034   EmitUint8(0xC1);
4035   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4036 }
4037 
4038 
xaddl(const Address & address,CpuRegister reg)4039 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
4040   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4041   EmitOptionalRex32(reg, address);
4042   EmitUint8(0x0F);
4043   EmitUint8(0xC1);
4044   EmitOperand(reg.LowBits(), address);
4045 }
4046 
4047 
xaddq(CpuRegister dst,CpuRegister src)4048 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
4049   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4050   EmitRex64(src, dst);
4051   EmitUint8(0x0F);
4052   EmitUint8(0xC1);
4053   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4054 }
4055 
4056 
xaddq(const Address & address,CpuRegister reg)4057 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
4058   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4059   EmitRex64(reg, address);
4060   EmitUint8(0x0F);
4061   EmitUint8(0xC1);
4062   EmitOperand(reg.LowBits(), address);
4063 }
4064 
4065 
cmpb(const Address & address,const Immediate & imm)4066 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
4067   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4068   CHECK(imm.is_int32());
4069   EmitOptionalRex32(address);
4070   EmitUint8(0x80);
4071   EmitOperand(7, address);
4072   EmitUint8(imm.value() & 0xFF);
4073 }
4074 
4075 
cmpw(const Address & address,const Immediate & imm)4076 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
4077   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4078   CHECK(imm.is_int32());
4079   EmitOperandSizeOverride();
4080   EmitOptionalRex32(address);
4081   EmitComplex(7, address, imm, /* is_16_op= */ true);
4082 }
4083 
4084 
cmpl(CpuRegister reg,const Immediate & imm)4085 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
4086   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4087   CHECK(imm.is_int32());
4088   EmitOptionalRex32(reg);
4089   EmitComplex(7, Operand(reg), imm);
4090 }
4091 
4092 
cmpl(CpuRegister reg0,CpuRegister reg1)4093 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
4094   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4095   EmitOptionalRex32(reg0, reg1);
4096   EmitUint8(0x3B);
4097   EmitOperand(reg0.LowBits(), Operand(reg1));
4098 }
4099 
4100 
cmpl(CpuRegister reg,const Address & address)4101 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
4102   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4103   EmitOptionalRex32(reg, address);
4104   EmitUint8(0x3B);
4105   EmitOperand(reg.LowBits(), address);
4106 }
4107 
4108 
cmpl(const Address & address,CpuRegister reg)4109 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
4110   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4111   EmitOptionalRex32(reg, address);
4112   EmitUint8(0x39);
4113   EmitOperand(reg.LowBits(), address);
4114 }
4115 
4116 
cmpl(const Address & address,const Immediate & imm)4117 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
4118   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4119   CHECK(imm.is_int32());
4120   EmitOptionalRex32(address);
4121   EmitComplex(7, address, imm);
4122 }
4123 
4124 
cmpq(CpuRegister reg0,CpuRegister reg1)4125 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
4126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4127   EmitRex64(reg0, reg1);
4128   EmitUint8(0x3B);
4129   EmitOperand(reg0.LowBits(), Operand(reg1));
4130 }
4131 
4132 
cmpq(CpuRegister reg,const Immediate & imm)4133 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
4134   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4135   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4136   EmitRex64(reg);
4137   EmitComplex(7, Operand(reg), imm);
4138 }
4139 
4140 
cmpq(CpuRegister reg,const Address & address)4141 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
4142   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4143   EmitRex64(reg, address);
4144   EmitUint8(0x3B);
4145   EmitOperand(reg.LowBits(), address);
4146 }
4147 
4148 
cmpq(const Address & address,const Immediate & imm)4149 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
4150   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4152   EmitRex64(address);
4153   EmitComplex(7, address, imm);
4154 }
4155 
4156 
addl(CpuRegister dst,CpuRegister src)4157 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
4158   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4159   EmitOptionalRex32(dst, src);
4160   EmitUint8(0x03);
4161   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4162 }
4163 
4164 
addl(CpuRegister reg,const Address & address)4165 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
4166   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4167   EmitOptionalRex32(reg, address);
4168   EmitUint8(0x03);
4169   EmitOperand(reg.LowBits(), address);
4170 }
4171 
4172 
testl(CpuRegister reg1,CpuRegister reg2)4173 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
4174   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4175   EmitOptionalRex32(reg1, reg2);
4176   EmitUint8(0x85);
4177   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4178 }
4179 
4180 
testl(CpuRegister reg,const Address & address)4181 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
4182   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4183   EmitOptionalRex32(reg, address);
4184   EmitUint8(0x85);
4185   EmitOperand(reg.LowBits(), address);
4186 }
4187 
4188 
testl(CpuRegister reg,const Immediate & immediate)4189 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
4190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4191   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
4192   // we only test the byte CpuRegister to keep the encoding short.
4193   if (immediate.is_uint8() && reg.AsRegister() < 4) {
4194     // Use zero-extended 8-bit immediate.
4195     if (reg.AsRegister() == RAX) {
4196       EmitUint8(0xA8);
4197     } else {
4198       EmitUint8(0xF6);
4199       EmitUint8(0xC0 + reg.AsRegister());
4200     }
4201     EmitUint8(immediate.value() & 0xFF);
4202   } else if (reg.AsRegister() == RAX) {
4203     // Use short form if the destination is RAX.
4204     EmitUint8(0xA9);
4205     EmitImmediate(immediate);
4206   } else {
4207     EmitOptionalRex32(reg);
4208     EmitUint8(0xF7);
4209     EmitOperand(0, Operand(reg));
4210     EmitImmediate(immediate);
4211   }
4212 }
4213 
4214 
testq(CpuRegister reg1,CpuRegister reg2)4215 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4216   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4217   EmitRex64(reg1, reg2);
4218   EmitUint8(0x85);
4219   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4220 }
4221 
4222 
testq(CpuRegister reg,const Address & address)4223 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4224   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4225   EmitRex64(reg, address);
4226   EmitUint8(0x85);
4227   EmitOperand(reg.LowBits(), address);
4228 }
4229 
4230 
testb(const Address & dst,const Immediate & imm)4231 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4232   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4233   EmitOptionalRex32(dst);
4234   EmitUint8(0xF6);
4235   EmitOperand(Register::RAX, dst);
4236   CHECK(imm.is_int8());
4237   EmitUint8(imm.value() & 0xFF);
4238 }
4239 
4240 
testl(const Address & dst,const Immediate & imm)4241 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4243   EmitOptionalRex32(dst);
4244   EmitUint8(0xF7);
4245   EmitOperand(0, dst);
4246   EmitImmediate(imm);
4247 }
4248 
4249 
andl(CpuRegister dst,CpuRegister src)4250 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4252   EmitOptionalRex32(dst, src);
4253   EmitUint8(0x23);
4254   EmitOperand(dst.LowBits(), Operand(src));
4255 }
4256 
4257 
andl(CpuRegister reg,const Address & address)4258 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4259   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4260   EmitOptionalRex32(reg, address);
4261   EmitUint8(0x23);
4262   EmitOperand(reg.LowBits(), address);
4263 }
4264 
4265 
andl(CpuRegister dst,const Immediate & imm)4266 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4267   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4268   EmitOptionalRex32(dst);
4269   EmitComplex(4, Operand(dst), imm);
4270 }
4271 
4272 
andq(CpuRegister reg,const Immediate & imm)4273 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4274   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4275   CHECK(imm.is_int32());  // andq only supports 32b immediate.
4276   EmitRex64(reg);
4277   EmitComplex(4, Operand(reg), imm);
4278 }
4279 
4280 
andq(CpuRegister dst,CpuRegister src)4281 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4282   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4283   EmitRex64(dst, src);
4284   EmitUint8(0x23);
4285   EmitOperand(dst.LowBits(), Operand(src));
4286 }
4287 
4288 
andq(CpuRegister dst,const Address & src)4289 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4290   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4291   EmitRex64(dst, src);
4292   EmitUint8(0x23);
4293   EmitOperand(dst.LowBits(), src);
4294 }
4295 
4296 
andw(const Address & address,const Immediate & imm)4297 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4298   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4299   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4300   EmitUint8(0x66);
4301   EmitOptionalRex32(address);
4302   EmitComplex(4, address, imm, /* is_16_op= */ true);
4303 }
4304 
4305 
orl(CpuRegister dst,CpuRegister src)4306 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4307   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4308   EmitOptionalRex32(dst, src);
4309   EmitUint8(0x0B);
4310   EmitOperand(dst.LowBits(), Operand(src));
4311 }
4312 
4313 
orl(CpuRegister reg,const Address & address)4314 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4315   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4316   EmitOptionalRex32(reg, address);
4317   EmitUint8(0x0B);
4318   EmitOperand(reg.LowBits(), address);
4319 }
4320 
4321 
orl(CpuRegister dst,const Immediate & imm)4322 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4323   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4324   EmitOptionalRex32(dst);
4325   EmitComplex(1, Operand(dst), imm);
4326 }
4327 
4328 
orq(CpuRegister dst,const Immediate & imm)4329 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4330   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4331   CHECK(imm.is_int32());  // orq only supports 32b immediate.
4332   EmitRex64(dst);
4333   EmitComplex(1, Operand(dst), imm);
4334 }
4335 
4336 
orq(CpuRegister dst,CpuRegister src)4337 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4338   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4339   EmitRex64(dst, src);
4340   EmitUint8(0x0B);
4341   EmitOperand(dst.LowBits(), Operand(src));
4342 }
4343 
4344 
orq(CpuRegister dst,const Address & src)4345 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4346   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4347   EmitRex64(dst, src);
4348   EmitUint8(0x0B);
4349   EmitOperand(dst.LowBits(), src);
4350 }
4351 
4352 
xorl(CpuRegister dst,CpuRegister src)4353 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4354   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4355   EmitOptionalRex32(dst, src);
4356   EmitUint8(0x33);
4357   EmitOperand(dst.LowBits(), Operand(src));
4358 }
4359 
4360 
xorl(CpuRegister reg,const Address & address)4361 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4362   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4363   EmitOptionalRex32(reg, address);
4364   EmitUint8(0x33);
4365   EmitOperand(reg.LowBits(), address);
4366 }
4367 
4368 
xorl(CpuRegister dst,const Immediate & imm)4369 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4370   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4371   EmitOptionalRex32(dst);
4372   EmitComplex(6, Operand(dst), imm);
4373 }
4374 
4375 
xorq(CpuRegister dst,CpuRegister src)4376 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4377   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4378   EmitRex64(dst, src);
4379   EmitUint8(0x33);
4380   EmitOperand(dst.LowBits(), Operand(src));
4381 }
4382 
4383 
xorq(CpuRegister dst,const Immediate & imm)4384 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4386   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
4387   EmitRex64(dst);
4388   EmitComplex(6, Operand(dst), imm);
4389 }
4390 
xorq(CpuRegister dst,const Address & src)4391 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4392   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4393   EmitRex64(dst, src);
4394   EmitUint8(0x33);
4395   EmitOperand(dst.LowBits(), src);
4396 }
4397 
4398 
4399 #if 0
4400 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4401   // REX.WRXB
4402   // W - 64-bit operand
4403   // R - MODRM.reg
4404   // X - SIB.index
4405   // B - MODRM.rm/SIB.base
4406   uint8_t rex = force ? 0x40 : 0;
4407   if (w) {
4408     rex |= 0x48;  // REX.W000
4409   }
4410   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4411     rex |= 0x44;  // REX.0R00
4412     *r = static_cast<Register>(*r - 8);
4413   }
4414   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4415     rex |= 0x42;  // REX.00X0
4416     *x = static_cast<Register>(*x - 8);
4417   }
4418   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4419     rex |= 0x41;  // REX.000B
4420     *b = static_cast<Register>(*b - 8);
4421   }
4422   if (rex != 0) {
4423     EmitUint8(rex);
4424   }
4425 }
4426 
4427 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4428   // REX.WRXB
4429   // W - 64-bit operand
4430   // R - MODRM.reg
4431   // X - SIB.index
4432   // B - MODRM.rm/SIB.base
4433   uint8_t rex = mem->rex();
4434   if (force) {
4435     rex |= 0x40;  // REX.0000
4436   }
4437   if (w) {
4438     rex |= 0x48;  // REX.W000
4439   }
4440   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4441     rex |= 0x44;  // REX.0R00
4442     *dst = static_cast<Register>(*dst - 8);
4443   }
4444   if (rex != 0) {
4445     EmitUint8(rex);
4446   }
4447 }
4448 
4449 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4450 #endif
4451 
addl(CpuRegister reg,const Immediate & imm)4452 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4454   EmitOptionalRex32(reg);
4455   EmitComplex(0, Operand(reg), imm);
4456 }
4457 
4458 
addw(CpuRegister reg,const Immediate & imm)4459 void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) {
4460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4461   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4462   EmitUint8(0x66);
4463   EmitOptionalRex32(reg);
4464   EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true);
4465 }
4466 
4467 
addq(CpuRegister reg,const Immediate & imm)4468 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4470   CHECK(imm.is_int32());  // addq only supports 32b immediate.
4471   EmitRex64(reg);
4472   EmitComplex(0, Operand(reg), imm);
4473 }
4474 
4475 
addq(CpuRegister dst,const Address & address)4476 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4478   EmitRex64(dst, address);
4479   EmitUint8(0x03);
4480   EmitOperand(dst.LowBits(), address);
4481 }
4482 
4483 
addq(CpuRegister dst,CpuRegister src)4484 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4485   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4486   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4487   EmitRex64(src, dst);
4488   EmitUint8(0x01);
4489   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4490 }
4491 
4492 
addl(const Address & address,CpuRegister reg)4493 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4494   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4495   EmitOptionalRex32(reg, address);
4496   EmitUint8(0x01);
4497   EmitOperand(reg.LowBits(), address);
4498 }
4499 
4500 
addl(const Address & address,const Immediate & imm)4501 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4502   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4503   EmitOptionalRex32(address);
4504   EmitComplex(0, address, imm);
4505 }
4506 
4507 
addw(const Address & address,const Immediate & imm)4508 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4509   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4510   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4511   EmitUint8(0x66);
4512   EmitOptionalRex32(address);
4513   EmitComplex(0, address, imm, /* is_16_op= */ true);
4514 }
4515 
4516 
addw(const Address & address,CpuRegister reg)4517 void X86_64Assembler::addw(const Address& address, CpuRegister reg) {
4518   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4519   EmitOperandSizeOverride();
4520   EmitOptionalRex32(reg, address);
4521   EmitUint8(0x01);
4522   EmitOperand(reg.LowBits(), address);
4523 }
4524 
4525 
subl(CpuRegister dst,CpuRegister src)4526 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4527   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4528   EmitOptionalRex32(dst, src);
4529   EmitUint8(0x2B);
4530   EmitOperand(dst.LowBits(), Operand(src));
4531 }
4532 
4533 
subl(CpuRegister reg,const Immediate & imm)4534 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4536   EmitOptionalRex32(reg);
4537   EmitComplex(5, Operand(reg), imm);
4538 }
4539 
4540 
subq(CpuRegister reg,const Immediate & imm)4541 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4542   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4543   CHECK(imm.is_int32());  // subq only supports 32b immediate.
4544   EmitRex64(reg);
4545   EmitComplex(5, Operand(reg), imm);
4546 }
4547 
4548 
subq(CpuRegister dst,CpuRegister src)4549 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4550   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4551   EmitRex64(dst, src);
4552   EmitUint8(0x2B);
4553   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4554 }
4555 
4556 
subq(CpuRegister reg,const Address & address)4557 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4558   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4559   EmitRex64(reg, address);
4560   EmitUint8(0x2B);
4561   EmitOperand(reg.LowBits() & 7, address);
4562 }
4563 
4564 
subl(CpuRegister reg,const Address & address)4565 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4566   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4567   EmitOptionalRex32(reg, address);
4568   EmitUint8(0x2B);
4569   EmitOperand(reg.LowBits(), address);
4570 }
4571 
4572 
cdq()4573 void X86_64Assembler::cdq() {
4574   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4575   EmitUint8(0x99);
4576 }
4577 
4578 
cqo()4579 void X86_64Assembler::cqo() {
4580   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4581   EmitRex64();
4582   EmitUint8(0x99);
4583 }
4584 
4585 
idivl(CpuRegister reg)4586 void X86_64Assembler::idivl(CpuRegister reg) {
4587   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4588   EmitOptionalRex32(reg);
4589   EmitUint8(0xF7);
4590   EmitUint8(0xF8 | reg.LowBits());
4591 }
4592 
4593 
idivq(CpuRegister reg)4594 void X86_64Assembler::idivq(CpuRegister reg) {
4595   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4596   EmitRex64(reg);
4597   EmitUint8(0xF7);
4598   EmitUint8(0xF8 | reg.LowBits());
4599 }
4600 
4601 
divl(CpuRegister reg)4602 void X86_64Assembler::divl(CpuRegister reg) {
4603   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4604   EmitOptionalRex32(reg);
4605   EmitUint8(0xF7);
4606   EmitUint8(0xF0 | reg.LowBits());
4607 }
4608 
4609 
divq(CpuRegister reg)4610 void X86_64Assembler::divq(CpuRegister reg) {
4611   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4612   EmitRex64(reg);
4613   EmitUint8(0xF7);
4614   EmitUint8(0xF0 | reg.LowBits());
4615 }
4616 
4617 
imull(CpuRegister dst,CpuRegister src)4618 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4619   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4620   EmitOptionalRex32(dst, src);
4621   EmitUint8(0x0F);
4622   EmitUint8(0xAF);
4623   EmitOperand(dst.LowBits(), Operand(src));
4624 }
4625 
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4626 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4627   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4628   CHECK(imm.is_int32());  // imull only supports 32b immediate.
4629 
4630   EmitOptionalRex32(dst, src);
4631 
4632   // See whether imm can be represented as a sign-extended 8bit value.
4633   int32_t v32 = static_cast<int32_t>(imm.value());
4634   if (IsInt<8>(v32)) {
4635     // Sign-extension works.
4636     EmitUint8(0x6B);
4637     EmitOperand(dst.LowBits(), Operand(src));
4638     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4639   } else {
4640     // Not representable, use full immediate.
4641     EmitUint8(0x69);
4642     EmitOperand(dst.LowBits(), Operand(src));
4643     EmitImmediate(imm);
4644   }
4645 }
4646 
4647 
imull(CpuRegister reg,const Immediate & imm)4648 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4649   imull(reg, reg, imm);
4650 }
4651 
4652 
imull(CpuRegister reg,const Address & address)4653 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4655   EmitOptionalRex32(reg, address);
4656   EmitUint8(0x0F);
4657   EmitUint8(0xAF);
4658   EmitOperand(reg.LowBits(), address);
4659 }
4660 
4661 
imulq(CpuRegister dst,CpuRegister src)4662 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4663   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4664   EmitRex64(dst, src);
4665   EmitUint8(0x0F);
4666   EmitUint8(0xAF);
4667   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4668 }
4669 
4670 
imulq(CpuRegister reg,const Immediate & imm)4671 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4672   imulq(reg, reg, imm);
4673 }
4674 
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4675 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4676   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4677   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
4678 
4679   EmitRex64(dst, reg);
4680 
4681   // See whether imm can be represented as a sign-extended 8bit value.
4682   int64_t v64 = imm.value();
4683   if (IsInt<8>(v64)) {
4684     // Sign-extension works.
4685     EmitUint8(0x6B);
4686     EmitOperand(dst.LowBits(), Operand(reg));
4687     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4688   } else {
4689     // Not representable, use full immediate.
4690     EmitUint8(0x69);
4691     EmitOperand(dst.LowBits(), Operand(reg));
4692     EmitImmediate(imm);
4693   }
4694 }
4695 
imulq(CpuRegister reg,const Address & address)4696 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4697   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4698   EmitRex64(reg, address);
4699   EmitUint8(0x0F);
4700   EmitUint8(0xAF);
4701   EmitOperand(reg.LowBits(), address);
4702 }
4703 
4704 
imull(CpuRegister reg)4705 void X86_64Assembler::imull(CpuRegister reg) {
4706   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4707   EmitOptionalRex32(reg);
4708   EmitUint8(0xF7);
4709   EmitOperand(5, Operand(reg));
4710 }
4711 
4712 
imulq(CpuRegister reg)4713 void X86_64Assembler::imulq(CpuRegister reg) {
4714   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4715   EmitRex64(reg);
4716   EmitUint8(0xF7);
4717   EmitOperand(5, Operand(reg));
4718 }
4719 
4720 
imull(const Address & address)4721 void X86_64Assembler::imull(const Address& address) {
4722   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4723   EmitOptionalRex32(address);
4724   EmitUint8(0xF7);
4725   EmitOperand(5, address);
4726 }
4727 
4728 
mull(CpuRegister reg)4729 void X86_64Assembler::mull(CpuRegister reg) {
4730   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4731   EmitOptionalRex32(reg);
4732   EmitUint8(0xF7);
4733   EmitOperand(4, Operand(reg));
4734 }
4735 
4736 
mull(const Address & address)4737 void X86_64Assembler::mull(const Address& address) {
4738   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4739   EmitOptionalRex32(address);
4740   EmitUint8(0xF7);
4741   EmitOperand(4, address);
4742 }
4743 
4744 
shll(CpuRegister reg,const Immediate & imm)4745 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4746   EmitGenericShift(false, 4, reg, imm);
4747 }
4748 
4749 
shlq(CpuRegister reg,const Immediate & imm)4750 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4751   EmitGenericShift(true, 4, reg, imm);
4752 }
4753 
4754 
shll(CpuRegister operand,CpuRegister shifter)4755 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4756   EmitGenericShift(false, 4, operand, shifter);
4757 }
4758 
4759 
shlq(CpuRegister operand,CpuRegister shifter)4760 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4761   EmitGenericShift(true, 4, operand, shifter);
4762 }
4763 
4764 
shrl(CpuRegister reg,const Immediate & imm)4765 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4766   EmitGenericShift(false, 5, reg, imm);
4767 }
4768 
4769 
shrq(CpuRegister reg,const Immediate & imm)4770 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4771   EmitGenericShift(true, 5, reg, imm);
4772 }
4773 
4774 
shrl(CpuRegister operand,CpuRegister shifter)4775 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4776   EmitGenericShift(false, 5, operand, shifter);
4777 }
4778 
4779 
shrq(CpuRegister operand,CpuRegister shifter)4780 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4781   EmitGenericShift(true, 5, operand, shifter);
4782 }
4783 
4784 
sarl(CpuRegister reg,const Immediate & imm)4785 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4786   EmitGenericShift(false, 7, reg, imm);
4787 }
4788 
4789 
sarl(CpuRegister operand,CpuRegister shifter)4790 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4791   EmitGenericShift(false, 7, operand, shifter);
4792 }
4793 
4794 
sarq(CpuRegister reg,const Immediate & imm)4795 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4796   EmitGenericShift(true, 7, reg, imm);
4797 }
4798 
4799 
sarq(CpuRegister operand,CpuRegister shifter)4800 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4801   EmitGenericShift(true, 7, operand, shifter);
4802 }
4803 
4804 
roll(CpuRegister reg,const Immediate & imm)4805 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4806   EmitGenericShift(false, 0, reg, imm);
4807 }
4808 
4809 
roll(CpuRegister operand,CpuRegister shifter)4810 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4811   EmitGenericShift(false, 0, operand, shifter);
4812 }
4813 
4814 
rorl(CpuRegister reg,const Immediate & imm)4815 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4816   EmitGenericShift(false, 1, reg, imm);
4817 }
4818 
4819 
rorl(CpuRegister operand,CpuRegister shifter)4820 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4821   EmitGenericShift(false, 1, operand, shifter);
4822 }
4823 
4824 
rolq(CpuRegister reg,const Immediate & imm)4825 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4826   EmitGenericShift(true, 0, reg, imm);
4827 }
4828 
4829 
rolq(CpuRegister operand,CpuRegister shifter)4830 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4831   EmitGenericShift(true, 0, operand, shifter);
4832 }
4833 
4834 
rorq(CpuRegister reg,const Immediate & imm)4835 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4836   EmitGenericShift(true, 1, reg, imm);
4837 }
4838 
4839 
rorq(CpuRegister operand,CpuRegister shifter)4840 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4841   EmitGenericShift(true, 1, operand, shifter);
4842 }
4843 
4844 
negl(CpuRegister reg)4845 void X86_64Assembler::negl(CpuRegister reg) {
4846   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4847   EmitOptionalRex32(reg);
4848   EmitUint8(0xF7);
4849   EmitOperand(3, Operand(reg));
4850 }
4851 
4852 
negq(CpuRegister reg)4853 void X86_64Assembler::negq(CpuRegister reg) {
4854   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855   EmitRex64(reg);
4856   EmitUint8(0xF7);
4857   EmitOperand(3, Operand(reg));
4858 }
4859 
4860 
notl(CpuRegister reg)4861 void X86_64Assembler::notl(CpuRegister reg) {
4862   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4863   EmitOptionalRex32(reg);
4864   EmitUint8(0xF7);
4865   EmitUint8(0xD0 | reg.LowBits());
4866 }
4867 
4868 
notq(CpuRegister reg)4869 void X86_64Assembler::notq(CpuRegister reg) {
4870   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4871   EmitRex64(reg);
4872   EmitUint8(0xF7);
4873   EmitOperand(2, Operand(reg));
4874 }
4875 
4876 
enter(const Immediate & imm)4877 void X86_64Assembler::enter(const Immediate& imm) {
4878   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4879   EmitUint8(0xC8);
4880   CHECK(imm.is_uint16()) << imm.value();
4881   EmitUint8(imm.value() & 0xFF);
4882   EmitUint8((imm.value() >> 8) & 0xFF);
4883   EmitUint8(0x00);
4884 }
4885 
4886 
leave()4887 void X86_64Assembler::leave() {
4888   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4889   EmitUint8(0xC9);
4890 }
4891 
4892 
ret()4893 void X86_64Assembler::ret() {
4894   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4895   EmitUint8(0xC3);
4896 }
4897 
4898 
ret(const Immediate & imm)4899 void X86_64Assembler::ret(const Immediate& imm) {
4900   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4901   EmitUint8(0xC2);
4902   CHECK(imm.is_uint16());
4903   EmitUint8(imm.value() & 0xFF);
4904   EmitUint8((imm.value() >> 8) & 0xFF);
4905 }
4906 
4907 
4908 
nop()4909 void X86_64Assembler::nop() {
4910   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4911   EmitUint8(0x90);
4912 }
4913 
4914 
int3()4915 void X86_64Assembler::int3() {
4916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4917   EmitUint8(0xCC);
4918 }
4919 
4920 
hlt()4921 void X86_64Assembler::hlt() {
4922   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4923   EmitUint8(0xF4);
4924 }
4925 
4926 
j(Condition condition,Label * label)4927 void X86_64Assembler::j(Condition condition, Label* label) {
4928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4929   if (label->IsBound()) {
4930     static const int kShortSize = 2;
4931     static const int kLongSize = 6;
4932     int offset = label->Position() - buffer_.Size();
4933     CHECK_LE(offset, 0);
4934     if (IsInt<8>(offset - kShortSize)) {
4935       EmitUint8(0x70 + condition);
4936       EmitUint8((offset - kShortSize) & 0xFF);
4937     } else {
4938       EmitUint8(0x0F);
4939       EmitUint8(0x80 + condition);
4940       EmitInt32(offset - kLongSize);
4941     }
4942   } else {
4943     EmitUint8(0x0F);
4944     EmitUint8(0x80 + condition);
4945     EmitLabelLink(label);
4946   }
4947 }
4948 
4949 
j(Condition condition,NearLabel * label)4950 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4952   if (label->IsBound()) {
4953     static const int kShortSize = 2;
4954     int offset = label->Position() - buffer_.Size();
4955     CHECK_LE(offset, 0);
4956     CHECK(IsInt<8>(offset - kShortSize));
4957     EmitUint8(0x70 + condition);
4958     EmitUint8((offset - kShortSize) & 0xFF);
4959   } else {
4960     EmitUint8(0x70 + condition);
4961     EmitLabelLink(label);
4962   }
4963 }
4964 
4965 
jrcxz(NearLabel * label)4966 void X86_64Assembler::jrcxz(NearLabel* label) {
4967   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4968   if (label->IsBound()) {
4969     static const int kShortSize = 2;
4970     int offset = label->Position() - buffer_.Size();
4971     CHECK_LE(offset, 0);
4972     CHECK(IsInt<8>(offset - kShortSize));
4973     EmitUint8(0xE3);
4974     EmitUint8((offset - kShortSize) & 0xFF);
4975   } else {
4976     EmitUint8(0xE3);
4977     EmitLabelLink(label);
4978   }
4979 }
4980 
4981 
jmp(CpuRegister reg)4982 void X86_64Assembler::jmp(CpuRegister reg) {
4983   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4984   EmitOptionalRex32(reg);
4985   EmitUint8(0xFF);
4986   EmitRegisterOperand(4, reg.LowBits());
4987 }
4988 
jmp(const Address & address)4989 void X86_64Assembler::jmp(const Address& address) {
4990   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4991   EmitOptionalRex32(address);
4992   EmitUint8(0xFF);
4993   EmitOperand(4, address);
4994 }
4995 
jmp(Label * label)4996 void X86_64Assembler::jmp(Label* label) {
4997   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998   if (label->IsBound()) {
4999     static const int kShortSize = 2;
5000     static const int kLongSize = 5;
5001     int offset = label->Position() - buffer_.Size();
5002     CHECK_LE(offset, 0);
5003     if (IsInt<8>(offset - kShortSize)) {
5004       EmitUint8(0xEB);
5005       EmitUint8((offset - kShortSize) & 0xFF);
5006     } else {
5007       EmitUint8(0xE9);
5008       EmitInt32(offset - kLongSize);
5009     }
5010   } else {
5011     EmitUint8(0xE9);
5012     EmitLabelLink(label);
5013   }
5014 }
5015 
5016 
jmp(NearLabel * label)5017 void X86_64Assembler::jmp(NearLabel* label) {
5018   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5019   if (label->IsBound()) {
5020     static const int kShortSize = 2;
5021     int offset = label->Position() - buffer_.Size();
5022     CHECK_LE(offset, 0);
5023     CHECK(IsInt<8>(offset - kShortSize));
5024     EmitUint8(0xEB);
5025     EmitUint8((offset - kShortSize) & 0xFF);
5026   } else {
5027     EmitUint8(0xEB);
5028     EmitLabelLink(label);
5029   }
5030 }
5031 
5032 
rep_movsw()5033 void X86_64Assembler::rep_movsw() {
5034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5035   EmitUint8(0x66);
5036   EmitUint8(0xF3);
5037   EmitUint8(0xA5);
5038 }
5039 
rep_movsb()5040 void X86_64Assembler::rep_movsb() {
5041   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5042   EmitUint8(0xF3);
5043   EmitUint8(0xA4);
5044 }
5045 
rep_movsl()5046 void X86_64Assembler::rep_movsl() {
5047   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5048   EmitUint8(0xF3);
5049   EmitUint8(0xA5);
5050 }
5051 
lock()5052 X86_64Assembler* X86_64Assembler::lock() {
5053   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5054   EmitUint8(0xF0);
5055   return this;
5056 }
5057 
5058 
cmpxchgb(const Address & address,CpuRegister reg)5059 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
5060   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5061   EmitOptionalByteRegNormalizingRex32(reg, address);
5062   EmitUint8(0x0F);
5063   EmitUint8(0xB0);
5064   EmitOperand(reg.LowBits(), address);
5065 }
5066 
5067 
cmpxchgw(const Address & address,CpuRegister reg)5068 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
5069   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5070   EmitOperandSizeOverride();
5071   EmitOptionalRex32(reg, address);
5072   EmitUint8(0x0F);
5073   EmitUint8(0xB1);
5074   EmitOperand(reg.LowBits(), address);
5075 }
5076 
5077 
cmpxchgl(const Address & address,CpuRegister reg)5078 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
5079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5080   EmitOptionalRex32(reg, address);
5081   EmitUint8(0x0F);
5082   EmitUint8(0xB1);
5083   EmitOperand(reg.LowBits(), address);
5084 }
5085 
5086 
cmpxchgq(const Address & address,CpuRegister reg)5087 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
5088   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5089   EmitRex64(reg, address);
5090   EmitUint8(0x0F);
5091   EmitUint8(0xB1);
5092   EmitOperand(reg.LowBits(), address);
5093 }
5094 
5095 
mfence()5096 void X86_64Assembler::mfence() {
5097   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5098   EmitUint8(0x0F);
5099   EmitUint8(0xAE);
5100   EmitUint8(0xF0);
5101 }
5102 
5103 
gs()5104 X86_64Assembler* X86_64Assembler::gs() {
5105   // TODO: gs is a prefix and not an instruction
5106   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5107   EmitUint8(0x65);
5108   return this;
5109 }
5110 
5111 
AddImmediate(CpuRegister reg,const Immediate & imm)5112 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
5113   int value = imm.value();
5114   if (value != 0) {
5115     if (value > 0) {
5116       addl(reg, imm);
5117     } else {
5118       subl(reg, Immediate(value));
5119     }
5120   }
5121 }
5122 
5123 
setcc(Condition condition,CpuRegister dst)5124 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
5125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5126   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
5127   if (dst.NeedsRex() || dst.AsRegister() > 3) {
5128     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
5129   }
5130   EmitUint8(0x0F);
5131   EmitUint8(0x90 + condition);
5132   EmitUint8(0xC0 + dst.LowBits());
5133 }
5134 
blsi(CpuRegister dst,CpuRegister src)5135 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
5136   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5137   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5138   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5139                                           /*X=*/ false,
5140                                           src.NeedsRex(),
5141                                           SET_VEX_M_0F_38);
5142   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
5143                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5144                                           SET_VEX_L_128,
5145                                           SET_VEX_PP_NONE);
5146   EmitUint8(byte_zero);
5147   EmitUint8(byte_one);
5148   EmitUint8(byte_two);
5149   EmitUint8(0xF3);
5150   EmitRegisterOperand(3, src.LowBits());
5151 }
5152 
blsmsk(CpuRegister dst,CpuRegister src)5153 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
5154   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5155   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5156   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5157                                           /*X=*/ false,
5158                                           src.NeedsRex(),
5159                                           SET_VEX_M_0F_38);
5160   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5161                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5162                                           SET_VEX_L_128,
5163                                           SET_VEX_PP_NONE);
5164   EmitUint8(byte_zero);
5165   EmitUint8(byte_one);
5166   EmitUint8(byte_two);
5167   EmitUint8(0xF3);
5168   EmitRegisterOperand(2, src.LowBits());
5169 }
5170 
blsr(CpuRegister dst,CpuRegister src)5171 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
5172   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5173   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
5174   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5175                                           /*X=*/ false,
5176                                           src.NeedsRex(),
5177                                           SET_VEX_M_0F_38);
5178   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5179                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5180                                           SET_VEX_L_128,
5181                                           SET_VEX_PP_NONE);
5182   EmitUint8(byte_zero);
5183   EmitUint8(byte_one);
5184   EmitUint8(byte_two);
5185   EmitUint8(0xF3);
5186   EmitRegisterOperand(1, src.LowBits());
5187 }
5188 
bswapl(CpuRegister dst)5189 void X86_64Assembler::bswapl(CpuRegister dst) {
5190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5191   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
5192   EmitUint8(0x0F);
5193   EmitUint8(0xC8 + dst.LowBits());
5194 }
5195 
bswapq(CpuRegister dst)5196 void X86_64Assembler::bswapq(CpuRegister dst) {
5197   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5198   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
5199   EmitUint8(0x0F);
5200   EmitUint8(0xC8 + dst.LowBits());
5201 }
5202 
bsfl(CpuRegister dst,CpuRegister src)5203 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
5204   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5205   EmitOptionalRex32(dst, src);
5206   EmitUint8(0x0F);
5207   EmitUint8(0xBC);
5208   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5209 }
5210 
bsfl(CpuRegister dst,const Address & src)5211 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
5212   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5213   EmitOptionalRex32(dst, src);
5214   EmitUint8(0x0F);
5215   EmitUint8(0xBC);
5216   EmitOperand(dst.LowBits(), src);
5217 }
5218 
bsfq(CpuRegister dst,CpuRegister src)5219 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
5220   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5221   EmitRex64(dst, src);
5222   EmitUint8(0x0F);
5223   EmitUint8(0xBC);
5224   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5225 }
5226 
bsfq(CpuRegister dst,const Address & src)5227 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
5228   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5229   EmitRex64(dst, src);
5230   EmitUint8(0x0F);
5231   EmitUint8(0xBC);
5232   EmitOperand(dst.LowBits(), src);
5233 }
5234 
bsrl(CpuRegister dst,CpuRegister src)5235 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
5236   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5237   EmitOptionalRex32(dst, src);
5238   EmitUint8(0x0F);
5239   EmitUint8(0xBD);
5240   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5241 }
5242 
bsrl(CpuRegister dst,const Address & src)5243 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
5244   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5245   EmitOptionalRex32(dst, src);
5246   EmitUint8(0x0F);
5247   EmitUint8(0xBD);
5248   EmitOperand(dst.LowBits(), src);
5249 }
5250 
bsrq(CpuRegister dst,CpuRegister src)5251 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5252   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5253   EmitRex64(dst, src);
5254   EmitUint8(0x0F);
5255   EmitUint8(0xBD);
5256   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5257 }
5258 
bsrq(CpuRegister dst,const Address & src)5259 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5260   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5261   EmitRex64(dst, src);
5262   EmitUint8(0x0F);
5263   EmitUint8(0xBD);
5264   EmitOperand(dst.LowBits(), src);
5265 }
5266 
popcntl(CpuRegister dst,CpuRegister src)5267 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5268   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5269   EmitUint8(0xF3);
5270   EmitOptionalRex32(dst, src);
5271   EmitUint8(0x0F);
5272   EmitUint8(0xB8);
5273   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5274 }
5275 
popcntl(CpuRegister dst,const Address & src)5276 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5277   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5278   EmitUint8(0xF3);
5279   EmitOptionalRex32(dst, src);
5280   EmitUint8(0x0F);
5281   EmitUint8(0xB8);
5282   EmitOperand(dst.LowBits(), src);
5283 }
5284 
popcntq(CpuRegister dst,CpuRegister src)5285 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5286   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5287   EmitUint8(0xF3);
5288   EmitRex64(dst, src);
5289   EmitUint8(0x0F);
5290   EmitUint8(0xB8);
5291   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5292 }
5293 
popcntq(CpuRegister dst,const Address & src)5294 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5296   EmitUint8(0xF3);
5297   EmitRex64(dst, src);
5298   EmitUint8(0x0F);
5299   EmitUint8(0xB8);
5300   EmitOperand(dst.LowBits(), src);
5301 }
5302 
rdtsc()5303 void X86_64Assembler::rdtsc() {
5304   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5305   EmitUint8(0x0F);
5306   EmitUint8(0x31);
5307 }
5308 
repne_scasb()5309 void X86_64Assembler::repne_scasb() {
5310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5311   EmitUint8(0xF2);
5312   EmitUint8(0xAE);
5313 }
5314 
repne_scasw()5315 void X86_64Assembler::repne_scasw() {
5316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5317   EmitUint8(0x66);
5318   EmitUint8(0xF2);
5319   EmitUint8(0xAF);
5320 }
5321 
repe_cmpsw()5322 void X86_64Assembler::repe_cmpsw() {
5323   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5324   EmitUint8(0x66);
5325   EmitUint8(0xF3);
5326   EmitUint8(0xA7);
5327 }
5328 
5329 
repe_cmpsl()5330 void X86_64Assembler::repe_cmpsl() {
5331   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5332   EmitUint8(0xF3);
5333   EmitUint8(0xA7);
5334 }
5335 
5336 
repe_cmpsq()5337 void X86_64Assembler::repe_cmpsq() {
5338   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5339   EmitUint8(0xF3);
5340   EmitRex64();
5341   EmitUint8(0xA7);
5342 }
5343 
ud2()5344 void X86_64Assembler::ud2() {
5345   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5346   EmitUint8(0x0F);
5347   EmitUint8(0x0B);
5348 }
5349 
LoadDoubleConstant(XmmRegister dst,double value)5350 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5351   // TODO: Need to have a code constants table.
5352   int64_t constant = bit_cast<int64_t, double>(value);
5353   pushq(Immediate(High32Bits(constant)));
5354   pushq(Immediate(Low32Bits(constant)));
5355   movsd(dst, Address(CpuRegister(RSP), 0));
5356   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5357 }
5358 
5359 
Align(int alignment,int offset)5360 void X86_64Assembler::Align(int alignment, int offset) {
5361   CHECK(IsPowerOfTwo(alignment));
5362   // Emit nop instruction until the real position is aligned.
5363   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5364     nop();
5365   }
5366 }
5367 
5368 
Bind(Label * label)5369 void X86_64Assembler::Bind(Label* label) {
5370   int bound = buffer_.Size();
5371   CHECK(!label->IsBound());  // Labels can only be bound once.
5372   while (label->IsLinked()) {
5373     int position = label->LinkPosition();
5374     int next = buffer_.Load<int32_t>(position);
5375     buffer_.Store<int32_t>(position, bound - (position + 4));
5376     label->position_ = next;
5377   }
5378   label->BindTo(bound);
5379 }
5380 
5381 
Bind(NearLabel * label)5382 void X86_64Assembler::Bind(NearLabel* label) {
5383   int bound = buffer_.Size();
5384   CHECK(!label->IsBound());  // Labels can only be bound once.
5385   while (label->IsLinked()) {
5386     int position = label->LinkPosition();
5387     uint8_t delta = buffer_.Load<uint8_t>(position);
5388     int offset = bound - (position + 1);
5389     CHECK(IsInt<8>(offset));
5390     buffer_.Store<int8_t>(position, offset);
5391     label->position_ = delta != 0u ? label->position_ - delta : 0;
5392   }
5393   label->BindTo(bound);
5394 }
5395 
5396 
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5397 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5398   CHECK_GE(reg_or_opcode, 0);
5399   CHECK_LT(reg_or_opcode, 8);
5400   const int length = operand.length_;
5401   CHECK_GT(length, 0);
5402   // Emit the ModRM byte updated with the given reg value.
5403   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5404   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5405   // Emit the rest of the encoded operand.
5406   for (int i = 1; i < length; i++) {
5407     EmitUint8(operand.encoding_[i]);
5408   }
5409   AssemblerFixup* fixup = operand.GetFixup();
5410   if (fixup != nullptr) {
5411     EmitFixup(fixup);
5412   }
5413 }
5414 
5415 
EmitImmediate(const Immediate & imm,bool is_16_op)5416 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5417   if (is_16_op) {
5418     EmitUint8(imm.value() & 0xFF);
5419     EmitUint8(imm.value() >> 8);
5420   } else if (imm.is_int32()) {
5421     EmitInt32(static_cast<int32_t>(imm.value()));
5422   } else {
5423     EmitInt64(imm.value());
5424   }
5425 }
5426 
5427 
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5428 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5429                                   const Operand& operand,
5430                                   const Immediate& immediate,
5431                                   bool is_16_op) {
5432   CHECK_GE(reg_or_opcode, 0);
5433   CHECK_LT(reg_or_opcode, 8);
5434   if (immediate.is_int8()) {
5435     // Use sign-extended 8-bit immediate.
5436     EmitUint8(0x83);
5437     EmitOperand(reg_or_opcode, operand);
5438     EmitUint8(immediate.value() & 0xFF);
5439   } else if (operand.IsRegister(CpuRegister(RAX))) {
5440     // Use short form if the destination is eax.
5441     EmitUint8(0x05 + (reg_or_opcode << 3));
5442     EmitImmediate(immediate, is_16_op);
5443   } else {
5444     EmitUint8(0x81);
5445     EmitOperand(reg_or_opcode, operand);
5446     EmitImmediate(immediate, is_16_op);
5447   }
5448 }
5449 
5450 
EmitLabel(Label * label,int instruction_size)5451 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5452   if (label->IsBound()) {
5453     int offset = label->Position() - buffer_.Size();
5454     CHECK_LE(offset, 0);
5455     EmitInt32(offset - instruction_size);
5456   } else {
5457     EmitLabelLink(label);
5458   }
5459 }
5460 
5461 
EmitLabelLink(Label * label)5462 void X86_64Assembler::EmitLabelLink(Label* label) {
5463   CHECK(!label->IsBound());
5464   int position = buffer_.Size();
5465   EmitInt32(label->position_);
5466   label->LinkTo(position);
5467 }
5468 
5469 
EmitLabelLink(NearLabel * label)5470 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5471   CHECK(!label->IsBound());
5472   int position = buffer_.Size();
5473   if (label->IsLinked()) {
5474     // Save the delta in the byte that we have to play with.
5475     uint32_t delta = position - label->LinkPosition();
5476     CHECK(IsUint<8>(delta));
5477     EmitUint8(delta & 0xFF);
5478   } else {
5479     EmitUint8(0);
5480   }
5481   label->LinkTo(position);
5482 }
5483 
5484 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5485 void X86_64Assembler::EmitGenericShift(bool wide,
5486                                        int reg_or_opcode,
5487                                        CpuRegister reg,
5488                                        const Immediate& imm) {
5489   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5490   CHECK(imm.is_int8());
5491   if (wide) {
5492     EmitRex64(reg);
5493   } else {
5494     EmitOptionalRex32(reg);
5495   }
5496   if (imm.value() == 1) {
5497     EmitUint8(0xD1);
5498     EmitOperand(reg_or_opcode, Operand(reg));
5499   } else {
5500     EmitUint8(0xC1);
5501     EmitOperand(reg_or_opcode, Operand(reg));
5502     EmitUint8(imm.value() & 0xFF);
5503   }
5504 }
5505 
5506 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5507 void X86_64Assembler::EmitGenericShift(bool wide,
5508                                        int reg_or_opcode,
5509                                        CpuRegister operand,
5510                                        CpuRegister shifter) {
5511   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5512   CHECK_EQ(shifter.AsRegister(), RCX);
5513   if (wide) {
5514     EmitRex64(operand);
5515   } else {
5516     EmitOptionalRex32(operand);
5517   }
5518   EmitUint8(0xD3);
5519   EmitOperand(reg_or_opcode, Operand(operand));
5520 }
5521 
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5522 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5523   // REX.WRXB
5524   // W - 64-bit operand
5525   // R - MODRM.reg
5526   // X - SIB.index
5527   // B - MODRM.rm/SIB.base
5528   uint8_t rex = force ? 0x40 : 0;
5529   if (w) {
5530     rex |= 0x48;  // REX.W000
5531   }
5532   if (r) {
5533     rex |= 0x44;  // REX.0R00
5534   }
5535   if (x) {
5536     rex |= 0x42;  // REX.00X0
5537   }
5538   if (b) {
5539     rex |= 0x41;  // REX.000B
5540   }
5541   if (rex != 0) {
5542     EmitUint8(rex);
5543   }
5544 }
5545 
EmitOptionalRex32(CpuRegister reg)5546 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5547   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5548 }
5549 
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5550 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5551   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5552 }
5553 
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5554 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5555   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5556 }
5557 
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5558 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5559   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5560 }
5561 
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5562 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5563   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5564 }
5565 
EmitOptionalRex32(const Operand & operand)5566 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5567   uint8_t rex = operand.rex();
5568   if (rex != 0) {
5569     EmitUint8(rex);
5570   }
5571 }
5572 
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5573 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5574   uint8_t rex = operand.rex();
5575   if (dst.NeedsRex()) {
5576     rex |= 0x44;  // REX.0R00
5577   }
5578   if (rex != 0) {
5579     EmitUint8(rex);
5580   }
5581 }
5582 
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5583 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5584   uint8_t rex = operand.rex();
5585   if (dst.NeedsRex()) {
5586     rex |= 0x44;  // REX.0R00
5587   }
5588   if (rex != 0) {
5589     EmitUint8(rex);
5590   }
5591 }
5592 
EmitRex64()5593 void X86_64Assembler::EmitRex64() {
5594   EmitOptionalRex(false, true, false, false, false);
5595 }
5596 
EmitRex64(CpuRegister reg)5597 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5598   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5599 }
5600 
EmitRex64(const Operand & operand)5601 void X86_64Assembler::EmitRex64(const Operand& operand) {
5602   uint8_t rex = operand.rex();
5603   rex |= 0x48;  // REX.W000
5604   EmitUint8(rex);
5605 }
5606 
EmitRex64(CpuRegister dst,CpuRegister src)5607 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5608   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5609 }
5610 
EmitRex64(XmmRegister dst,CpuRegister src)5611 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5612   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5613 }
5614 
EmitRex64(CpuRegister dst,XmmRegister src)5615 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5616   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5617 }
5618 
EmitRex64(CpuRegister dst,const Operand & operand)5619 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5620   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5621   if (dst.NeedsRex()) {
5622     rex |= 0x44;  // REX.0R00
5623   }
5624   EmitUint8(rex);
5625 }
5626 
EmitRex64(XmmRegister dst,const Operand & operand)5627 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5628   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5629   if (dst.NeedsRex()) {
5630     rex |= 0x44;  // REX.0R00
5631   }
5632   EmitUint8(rex);
5633 }
5634 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)5635 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
5636                                                           CpuRegister src,
5637                                                           bool normalize_both) {
5638   // SPL, BPL, SIL, DIL need the REX prefix.
5639   bool force = src.AsRegister() > 3;
5640   if (normalize_both) {
5641     // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
5642     // prefix if either `src` or `dst` needs it.
5643     force |= dst.AsRegister() > 3;
5644   } else {
5645     // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
5646     // They need REX prefix only if `src` needs it, but not `dst`.
5647   }
5648   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5649 }
5650 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5651 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5652   uint8_t rex = operand.rex();
5653   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5654   bool force = dst.AsRegister() > 3;
5655   if (force) {
5656     rex |= 0x40;  // REX.0000
5657   }
5658   if (dst.NeedsRex()) {
5659     rex |= 0x44;  // REX.0R00
5660   }
5661   if (rex != 0) {
5662     EmitUint8(rex);
5663   }
5664 }
5665 
AddConstantArea()5666 void X86_64Assembler::AddConstantArea() {
5667   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5668   for (size_t i = 0, e = area.size(); i < e; i++) {
5669     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5670     EmitInt32(area[i]);
5671   }
5672 }
5673 
AppendInt32(int32_t v)5674 size_t ConstantArea::AppendInt32(int32_t v) {
5675   size_t result = buffer_.size() * elem_size_;
5676   buffer_.push_back(v);
5677   return result;
5678 }
5679 
AddInt32(int32_t v)5680 size_t ConstantArea::AddInt32(int32_t v) {
5681   // Look for an existing match.
5682   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5683     if (v == buffer_[i]) {
5684       return i * elem_size_;
5685     }
5686   }
5687 
5688   // Didn't match anything.
5689   return AppendInt32(v);
5690 }
5691 
AddInt64(int64_t v)5692 size_t ConstantArea::AddInt64(int64_t v) {
5693   int32_t v_low = v;
5694   int32_t v_high = v >> 32;
5695   if (buffer_.size() > 1) {
5696     // Ensure we don't pass the end of the buffer.
5697     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5698       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5699         return i * elem_size_;
5700       }
5701     }
5702   }
5703 
5704   // Didn't match anything.
5705   size_t result = buffer_.size() * elem_size_;
5706   buffer_.push_back(v_low);
5707   buffer_.push_back(v_high);
5708   return result;
5709 }
5710 
AddDouble(double v)5711 size_t ConstantArea::AddDouble(double v) {
5712   // Treat the value as a 64-bit integer value.
5713   return AddInt64(bit_cast<int64_t, double>(v));
5714 }
5715 
AddFloat(float v)5716 size_t ConstantArea::AddFloat(float v) {
5717   // Treat the value as a 32-bit integer value.
5718   return AddInt32(bit_cast<int32_t, float>(v));
5719 }
5720 
EmitVexPrefixByteZero(bool is_twobyte_form)5721 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5722   // Vex Byte 0,
5723   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5724   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5725   uint8_t vex_prefix = 0xC0;
5726   if (is_twobyte_form) {
5727     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
5728   } else {
5729     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
5730   }
5731   return vex_prefix;
5732 }
5733 
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5734 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5735   // Vex Byte 1,
5736   uint8_t vex_prefix = VEX_INIT;
5737   /** Bit[7] This bit needs to be set to '1'
5738   otherwise the instruction is LES or LDS */
5739   if (!R) {
5740     // R .
5741     vex_prefix |= SET_VEX_R;
5742   }
5743   /** Bit[6] This bit needs to be set to '1'
5744   otherwise the instruction is LES or LDS */
5745   if (!X) {
5746     // X .
5747     vex_prefix |= SET_VEX_X;
5748   }
5749   /** Bit[5] This bit needs to be set to '1' */
5750   if (!B) {
5751     // B .
5752     vex_prefix |= SET_VEX_B;
5753   }
5754   /** Bits[4:0], Based on the instruction documentaion */
5755   vex_prefix |= SET_VEX_M;
5756   return vex_prefix;
5757 }
5758 
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5759 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5760                                               X86_64ManagedRegister operand,
5761                                               int SET_VEX_L,
5762                                               int SET_VEX_PP) {
5763   // Vex Byte 1,
5764   uint8_t vex_prefix = VEX_INIT;
5765   /** Bit[7] This bit needs to be set to '1'
5766   otherwise the instruction is LES or LDS */
5767   if (!R) {
5768     // R .
5769     vex_prefix |= SET_VEX_R;
5770   }
5771   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5772   if (operand.IsNoRegister()) {
5773     vex_prefix |= 0x78;
5774   } else if (operand.IsXmmRegister()) {
5775     XmmRegister vvvv = operand.AsXmmRegister();
5776     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5777     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5778     vex_prefix |= ((reg & 0x0F) << 3);
5779   } else if (operand.IsCpuRegister()) {
5780     CpuRegister vvvv = operand.AsCpuRegister();
5781     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5782     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5783     vex_prefix |= ((reg & 0x0F) << 3);
5784   }
5785   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5786   VEX.L = 0 indicates 128 bit vector operation */
5787   vex_prefix |= SET_VEX_L;
5788   // Bits[1:0] -  "pp"
5789   vex_prefix |= SET_VEX_PP;
5790   return vex_prefix;
5791 }
5792 
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5793 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5794                                               X86_64ManagedRegister operand,
5795                                               int SET_VEX_L,
5796                                               int SET_VEX_PP) {
5797   // Vex Byte 2,
5798   uint8_t vex_prefix = VEX_INIT;
5799 
5800   /** Bit[7] This bits needs to be set to '1' with default value.
5801   When using C4H form of VEX prefix, REX.W value is ignored */
5802   if (W) {
5803     vex_prefix |= SET_VEX_W;
5804   }
5805   // Bits[6:3] - 'vvvv' the source or dest register specifier
5806   if (operand.IsXmmRegister()) {
5807     XmmRegister vvvv = operand.AsXmmRegister();
5808     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5809     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5810     vex_prefix |= ((reg & 0x0F) << 3);
5811   } else if (operand.IsCpuRegister()) {
5812     CpuRegister vvvv = operand.AsCpuRegister();
5813     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5814     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5815     vex_prefix |= ((reg & 0x0F) << 3);
5816   }
5817   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5818   VEX.L = 0 indicates 128 bit vector operation */
5819   vex_prefix |= SET_VEX_L;
5820   // Bits[1:0] -  "pp"
5821   vex_prefix |= SET_VEX_PP;
5822   return vex_prefix;
5823 }
5824 
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5825 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5826                                               int SET_VEX_L,
5827                                               int SET_VEX_PP) {
5828   // Vex Byte 2,
5829   uint8_t vex_prefix = VEX_INIT;
5830 
5831   /** Bit[7] This bits needs to be set to '1' with default value.
5832   When using C4H form of VEX prefix, REX.W value is ignored */
5833   if (W) {
5834     vex_prefix |= SET_VEX_W;
5835   }
5836   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5837   vex_prefix |= (0x0F << 3);
5838   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5839   VEX.L = 0 indicates 128 bit vector operation */
5840   vex_prefix |= SET_VEX_L;
5841 
5842   // Bits[1:0] -  "pp"
5843   if (SET_VEX_PP != SET_VEX_PP_NONE) {
5844     vex_prefix |= SET_VEX_PP;
5845   }
5846   return vex_prefix;
5847 }
5848 
5849 }  // namespace x86_64
5850 }  // namespace art
5851