1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
18 #define ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
19 
20 #include <vector>
21 
22 #include "arch/x86/instruction_set_features_x86.h"
23 #include "base/arena_containers.h"
24 #include "base/array_ref.h"
25 #include "base/bit_utils.h"
26 #include "base/globals.h"
27 #include "base/macros.h"
28 #include "base/pointer_size.h"
29 #include "constants_x86.h"
30 #include "heap_poisoning.h"
31 #include "managed_register_x86.h"
32 #include "offsets.h"
33 #include "utils/assembler.h"
34 
35 namespace art HIDDEN {
36 namespace x86 {
37 
38 class Immediate : public ValueObject {
39  public:
Immediate(int32_t value_in)40   explicit Immediate(int32_t value_in) : value_(value_in) {}
41 
value()42   int32_t value() const { return value_; }
43 
is_int8()44   bool is_int8() const { return IsInt<8>(value_); }
is_uint8()45   bool is_uint8() const { return IsUint<8>(value_); }
is_int16()46   bool is_int16() const { return IsInt<16>(value_); }
is_uint16()47   bool is_uint16() const { return IsUint<16>(value_); }
48 
49  private:
50   const int32_t value_;
51 };
52 
53 
54 class Operand : public ValueObject {
55  public:
mod()56   uint8_t mod() const {
57     return (encoding_at(0) >> 6) & 3;
58   }
59 
rm()60   Register rm() const {
61     return static_cast<Register>(encoding_at(0) & 7);
62   }
63 
scale()64   ScaleFactor scale() const {
65     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
66   }
67 
index()68   Register index() const {
69     return static_cast<Register>((encoding_at(1) >> 3) & 7);
70   }
71 
base()72   Register base() const {
73     return static_cast<Register>(encoding_at(1) & 7);
74   }
75 
disp()76   int32_t disp() const {
77     return disp_;
78   }
79 
disp8()80   int8_t disp8() const {
81     CHECK_GE(length_, 2);
82     return static_cast<int8_t>(encoding_[length_ - 1]);
83   }
84 
disp32()85   int32_t disp32() const {
86     CHECK_GE(length_, 5);
87     int32_t value;
88     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
89     return value;
90   }
91 
IsRegister(Register reg)92   bool IsRegister(Register reg) const {
93     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
94         && ((encoding_[0] & 0x07) == reg);  // Register codes match.
95   }
96 
97   inline bool operator==(const Operand &op) const {
98     return length_ == op.length_ &&
99         memcmp(encoding_, op.encoding_, length_) == 0 &&
100         disp_ == op.disp_ &&
101         fixup_ == op.fixup_;
102   }
103 
104  protected:
105   // Operand can be sub classed (e.g: Address).
Operand()106   Operand() : length_(0), disp_(0), fixup_(nullptr) { }
107 
SetModRM(int mod_in,Register rm_in)108   void SetModRM(int mod_in, Register rm_in) {
109     CHECK_EQ(mod_in & ~3, 0);
110     encoding_[0] = (mod_in << 6) | rm_in;
111     length_ = 1;
112   }
113 
SetSIB(ScaleFactor scale_in,Register index_in,Register base_in)114   void SetSIB(ScaleFactor scale_in, Register index_in, Register base_in) {
115     CHECK_EQ(length_, 1);
116     CHECK_EQ(scale_in & ~3, 0);
117     encoding_[1] = (scale_in << 6) | (index_in << 3) | base_in;
118     length_ = 2;
119   }
120 
SetDisp8(int8_t disp)121   void SetDisp8(int8_t disp) {
122     CHECK(length_ == 1 || length_ == 2);
123     encoding_[length_++] = static_cast<uint8_t>(disp);
124     disp_ = disp;
125   }
126 
SetDisp32(int32_t disp)127   void SetDisp32(int32_t disp) {
128     CHECK(length_ == 1 || length_ == 2);
129     int disp_size = sizeof(disp);
130     memmove(&encoding_[length_], &disp, disp_size);
131     length_ += disp_size;
132     disp_ = disp;
133   }
134 
GetFixup()135   AssemblerFixup* GetFixup() const {
136     return fixup_;
137   }
138 
SetFixup(AssemblerFixup * fixup)139   void SetFixup(AssemblerFixup* fixup) {
140     fixup_ = fixup;
141   }
142 
143  private:
144   uint8_t length_;
145   uint8_t encoding_[6];
146   int32_t disp_;
147 
148   // A fixup can be associated with the operand, in order to be applied after the
149   // code has been generated. This is used for constant area fixups.
150   AssemblerFixup* fixup_;
151 
Operand(Register reg)152   explicit Operand(Register reg) : disp_(0), fixup_(nullptr) { SetModRM(3, reg); }
153 
154   // Get the operand encoding byte at the given index.
encoding_at(int index_in)155   uint8_t encoding_at(int index_in) const {
156     CHECK_GE(index_in, 0);
157     CHECK_LT(index_in, length_);
158     return encoding_[index_in];
159   }
160 
161   friend class X86Assembler;
162 };
163 
164 
165 class Address : public Operand {
166  public:
Address(Register base_in,int32_t disp)167   Address(Register base_in, int32_t disp) {
168     Init(base_in, disp);
169   }
170 
Address(Register base_in,int32_t disp,AssemblerFixup * fixup)171   Address(Register base_in, int32_t disp, AssemblerFixup *fixup) {
172     Init(base_in, disp);
173     SetFixup(fixup);
174   }
175 
Address(Register base_in,Offset disp)176   Address(Register base_in, Offset disp) {
177     Init(base_in, disp.Int32Value());
178   }
179 
Address(Register base_in,FrameOffset disp)180   Address(Register base_in, FrameOffset disp) {
181     CHECK_EQ(base_in, ESP);
182     Init(ESP, disp.Int32Value());
183   }
184 
Address(Register base_in,MemberOffset disp)185   Address(Register base_in, MemberOffset disp) {
186     Init(base_in, disp.Int32Value());
187   }
188 
Address(Register index_in,ScaleFactor scale_in,int32_t disp)189   Address(Register index_in, ScaleFactor scale_in, int32_t disp) {
190     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
191     SetModRM(0, ESP);
192     SetSIB(scale_in, index_in, EBP);
193     SetDisp32(disp);
194   }
195 
Address(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp)196   Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
197     Init(base_in, index_in, scale_in, disp);
198   }
199 
Address(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp,AssemblerFixup * fixup)200   Address(Register base_in,
201           Register index_in,
202           ScaleFactor scale_in,
203           int32_t disp, AssemblerFixup *fixup) {
204     Init(base_in, index_in, scale_in, disp);
205     SetFixup(fixup);
206   }
207 
208   // Break the address into pieces and reassemble it again with a new displacement.
209   // Note that it may require a new addressing mode if displacement size is changed.
displace(const Address & addr,int32_t disp)210   static Address displace(const Address &addr, int32_t disp) {
211     const int32_t new_disp = addr.disp() + disp;
212     const bool sib = addr.rm() == ESP;
213     const bool ebp = EBP == (sib ? addr.base() : addr.rm());
214     Address new_addr;
215     if (addr.mod() == 0 && ebp) {
216       // Special case: mod 00b and EBP in r/m or SIB base => 32-bit displacement.
217       new_addr.SetModRM(0, addr.rm());
218       if (sib) {
219         new_addr.SetSIB(addr.scale(), addr.index(), addr.base());
220       }
221       new_addr.SetDisp32(new_disp);
222     } else if (new_disp == 0 && !ebp) {
223       // Mod 00b (excluding a special case for EBP) => no displacement.
224       new_addr.SetModRM(0, addr.rm());
225       if (sib) {
226         new_addr.SetSIB(addr.scale(), addr.index(), addr.base());
227       }
228     } else if (new_disp >= -128 && new_disp <= 127) {
229       // Mod 01b => 8-bit displacement.
230       new_addr.SetModRM(1, addr.rm());
231       if (sib) {
232         new_addr.SetSIB(addr.scale(), addr.index(), addr.base());
233       }
234       new_addr.SetDisp8(new_disp);
235     } else {
236       // Mod 10b => 32-bit displacement.
237       new_addr.SetModRM(2, addr.rm());
238       if (sib) {
239         new_addr.SetSIB(addr.scale(), addr.index(), addr.base());
240       }
241       new_addr.SetDisp32(new_disp);
242     }
243     new_addr.SetFixup(addr.GetFixup());
244     return new_addr;
245   }
246 
GetBaseRegister()247   Register GetBaseRegister() {
248     if (rm() == ESP) {
249       return base();
250     } else {
251       return rm();
252     }
253   }
254 
Absolute(uintptr_t addr)255   static Address Absolute(uintptr_t addr) {
256     Address result;
257     result.SetModRM(0, EBP);
258     result.SetDisp32(addr);
259     return result;
260   }
261 
Absolute(ThreadOffset32 addr)262   static Address Absolute(ThreadOffset32 addr) {
263     return Absolute(addr.Int32Value());
264   }
265 
266   inline bool operator==(const Address& addr) const {
267     return static_cast<const Operand&>(*this) == static_cast<const Operand&>(addr);
268   }
269 
270  private:
Address()271   Address() {}
272 
Init(Register base_in,int32_t disp)273   void Init(Register base_in, int32_t disp) {
274     if (disp == 0 && base_in != EBP) {
275       SetModRM(0, base_in);
276       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
277     } else if (disp >= -128 && disp <= 127) {
278       SetModRM(1, base_in);
279       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
280       SetDisp8(disp);
281     } else {
282       SetModRM(2, base_in);
283       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
284       SetDisp32(disp);
285     }
286   }
287 
Init(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp)288   void Init(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
289     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
290     if (disp == 0 && base_in != EBP) {
291       SetModRM(0, ESP);
292       SetSIB(scale_in, index_in, base_in);
293     } else if (disp >= -128 && disp <= 127) {
294       SetModRM(1, ESP);
295       SetSIB(scale_in, index_in, base_in);
296       SetDisp8(disp);
297     } else {
298       SetModRM(2, ESP);
299       SetSIB(scale_in, index_in, base_in);
300       SetDisp32(disp);
301     }
302   }
303 };
304 
305 std::ostream& operator<<(std::ostream& os, const Address& addr);
306 
307 // This is equivalent to the Label class, used in a slightly different context. We
308 // inherit the functionality of the Label class, but prevent unintended
309 // derived-to-base conversions by making the base class private.
310 class NearLabel : private Label {
311  public:
NearLabel()312   NearLabel() : Label() {}
313 
314   // Expose the Label routines that we need.
315   using Label::Position;
316   using Label::LinkPosition;
317   using Label::IsBound;
318   using Label::IsUnused;
319   using Label::IsLinked;
320 
321  private:
322   using Label::BindTo;
323   using Label::LinkTo;
324 
325   friend class x86::X86Assembler;
326 
327   DISALLOW_COPY_AND_ASSIGN(NearLabel);
328 };
329 
330 /**
331  * Class to handle constant area values.
332  */
333 class ConstantArea {
334  public:
ConstantArea(ArenaAllocator * allocator)335   explicit ConstantArea(ArenaAllocator* allocator)
336       : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
337 
338   // Add a double to the constant area, returning the offset into
339   // the constant area where the literal resides.
340   size_t AddDouble(double v);
341 
342   // Add a float to the constant area, returning the offset into
343   // the constant area where the literal resides.
344   size_t AddFloat(float v);
345 
346   // Add an int32_t to the constant area, returning the offset into
347   // the constant area where the literal resides.
348   size_t AddInt32(int32_t v);
349 
350   // Add an int32_t to the end of the constant area, returning the offset into
351   // the constant area where the literal resides.
352   size_t AppendInt32(int32_t v);
353 
354   // Add an int64_t to the constant area, returning the offset into
355   // the constant area where the literal resides.
356   size_t AddInt64(int64_t v);
357 
IsEmpty()358   bool IsEmpty() const {
359     return buffer_.size() == 0;
360   }
361 
GetSize()362   size_t GetSize() const {
363     return buffer_.size() * elem_size_;
364   }
365 
GetBuffer()366   ArrayRef<const int32_t> GetBuffer() const {
367     return ArrayRef<const int32_t>(buffer_);
368   }
369 
370  private:
371   static constexpr size_t elem_size_ = sizeof(int32_t);
372   ArenaVector<int32_t> buffer_;
373 };
374 
375 class X86Assembler final : public Assembler {
376  public:
377   explicit X86Assembler(ArenaAllocator* allocator,
378                         const X86InstructionSetFeatures* instruction_set_features = nullptr)
Assembler(allocator)379                 : Assembler(allocator),
380                   constant_area_(allocator),
381                   has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX() : false),
382                   has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() :false) {}
~X86Assembler()383   virtual ~X86Assembler() {}
384 
385   /*
386    * Emit Machine Instructions.
387    */
388   void call(Register reg);
389   void call(const Address& address);
390   void call(Label* label);
391   void call(const ExternalLabel& label);
392 
393   void pushl(Register reg);
394   void pushl(const Address& address);
395   void pushl(const Immediate& imm);
396 
397   void popl(Register reg);
398   void popl(const Address& address);
399 
400   void movl(Register dst, const Immediate& src);
401   void movl(Register dst, Register src);
402 
403   void movl(Register dst, const Address& src);
404   void movl(const Address& dst, Register src);
405   void movl(const Address& dst, const Immediate& imm);
406   void movl(const Address& dst, Label* lbl);
407 
408   void movntl(const Address& dst, Register src);
409 
410   void blsi(Register dst, Register src);  // no addr variant (for now)
411   void blsmsk(Register dst, Register src);  // no addr variant (for now)
412   void blsr(Register dst, Register src);  // no addr varianr (for now)
413 
414   void bswapl(Register dst);
415 
416   void bsfl(Register dst, Register src);
417   void bsfl(Register dst, const Address& src);
418   void bsrl(Register dst, Register src);
419   void bsrl(Register dst, const Address& src);
420 
421   void popcntl(Register dst, Register src);
422   void popcntl(Register dst, const Address& src);
423 
424   void rorl(Register reg, const Immediate& imm);
425   void rorl(Register operand, Register shifter);
426   void roll(Register reg, const Immediate& imm);
427   void roll(Register operand, Register shifter);
428 
429   void movzxb(Register dst, ByteRegister src);
430   void movzxb(Register dst, const Address& src);
431   void movsxb(Register dst, ByteRegister src);
432   void movsxb(Register dst, const Address& src);
433   void movb(Register dst, const Address& src);
434   void movb(const Address& dst, ByteRegister src);
435   void movb(const Address& dst, const Immediate& imm);
436 
437   void movzxw(Register dst, Register src);
438   void movzxw(Register dst, const Address& src);
439   void movsxw(Register dst, Register src);
440   void movsxw(Register dst, const Address& src);
441   void movw(Register dst, const Address& src);
442   void movw(const Address& dst, Register src);
443   void movw(const Address& dst, const Immediate& imm);
444 
445   void leal(Register dst, const Address& src);
446 
447   void cmovl(Condition condition, Register dst, Register src);
448   void cmovl(Condition condition, Register dst, const Address& src);
449 
450   void setb(Condition condition, Register dst);
451 
452   void movaps(XmmRegister dst, XmmRegister src);     // move
453   void movaps(XmmRegister dst, const Address& src);  // load aligned
454   void movups(XmmRegister dst, const Address& src);  // load unaligned
455   void movaps(const Address& dst, XmmRegister src);  // store aligned
456   void movups(const Address& dst, XmmRegister src);  // store unaligned
457 
458   void vmovaps(XmmRegister dst, XmmRegister src);     // move
459   void vmovaps(XmmRegister dst, const Address& src);  // load aligned
460   void vmovups(XmmRegister dst, const Address& src);  // load unaligned
461   void vmovaps(const Address& dst, XmmRegister src);  // store aligned
462   void vmovups(const Address& dst, XmmRegister src);  // store unaligned
463 
464   void movss(XmmRegister dst, const Address& src);
465   void movss(const Address& dst, XmmRegister src);
466   void movss(XmmRegister dst, XmmRegister src);
467 
468   void movd(XmmRegister dst, Register src);
469   void movd(Register dst, XmmRegister src);
470 
471   void addss(XmmRegister dst, XmmRegister src);
472   void addss(XmmRegister dst, const Address& src);
473   void subss(XmmRegister dst, XmmRegister src);
474   void subss(XmmRegister dst, const Address& src);
475   void mulss(XmmRegister dst, XmmRegister src);
476   void mulss(XmmRegister dst, const Address& src);
477   void divss(XmmRegister dst, XmmRegister src);
478   void divss(XmmRegister dst, const Address& src);
479 
480   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
481   void subps(XmmRegister dst, XmmRegister src);
482   void mulps(XmmRegister dst, XmmRegister src);
483   void divps(XmmRegister dst, XmmRegister src);
484 
485   void vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
486   void vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
487   void vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
488   void vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
489 
490   void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
491   void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
492   void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
493   void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
494 
495   void vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right);
496   void vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right);
497 
498   void movapd(XmmRegister dst, XmmRegister src);     // move
499   void movapd(XmmRegister dst, const Address& src);  // load aligned
500   void movupd(XmmRegister dst, const Address& src);  // load unaligned
501   void movapd(const Address& dst, XmmRegister src);  // store aligned
502   void movupd(const Address& dst, XmmRegister src);  // store unaligned
503 
504   void vmovapd(XmmRegister dst, XmmRegister src);     // move
505   void vmovapd(XmmRegister dst, const Address& src);  // load aligned
506   void vmovupd(XmmRegister dst, const Address& src);  // load unaligned
507   void vmovapd(const Address& dst, XmmRegister src);  // store aligned
508   void vmovupd(const Address& dst, XmmRegister src);  // store unaligned
509 
510   void movsd(XmmRegister dst, const Address& src);
511   void movsd(const Address& dst, XmmRegister src);
512   void movsd(XmmRegister dst, XmmRegister src);
513 
514   void movhpd(XmmRegister dst, const Address& src);
515   void movhpd(const Address& dst, XmmRegister src);
516 
517   void addsd(XmmRegister dst, XmmRegister src);
518   void addsd(XmmRegister dst, const Address& src);
519   void subsd(XmmRegister dst, XmmRegister src);
520   void subsd(XmmRegister dst, const Address& src);
521   void mulsd(XmmRegister dst, XmmRegister src);
522   void mulsd(XmmRegister dst, const Address& src);
523   void divsd(XmmRegister dst, XmmRegister src);
524   void divsd(XmmRegister dst, const Address& src);
525 
526   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
527   void subpd(XmmRegister dst, XmmRegister src);
528   void mulpd(XmmRegister dst, XmmRegister src);
529   void divpd(XmmRegister dst, XmmRegister src);
530 
531   void movdqa(XmmRegister dst, XmmRegister src);     // move
532   void movdqa(XmmRegister dst, const Address& src);  // load aligned
533   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
534   void movdqa(const Address& dst, XmmRegister src);  // store aligned
535   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
536 
537   void vmovdqa(XmmRegister dst, XmmRegister src);     // move
538   void vmovdqa(XmmRegister dst, const Address& src);  // load aligned
539   void vmovdqu(XmmRegister dst, const Address& src);  // load unaligned
540   void vmovdqa(const Address& dst, XmmRegister src);  // store aligned
541   void vmovdqu(const Address& dst, XmmRegister src);  // store unaligned
542 
543   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
544   void psubb(XmmRegister dst, XmmRegister src);
545 
546   void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
547   void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
548 
549   void paddw(XmmRegister dst, XmmRegister src);
550   void psubw(XmmRegister dst, XmmRegister src);
551   void pmullw(XmmRegister dst, XmmRegister src);
552   void vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
553 
554   void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
555   void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
556   void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
557 
558   void paddd(XmmRegister dst, XmmRegister src);
559   void psubd(XmmRegister dst, XmmRegister src);
560   void pmulld(XmmRegister dst, XmmRegister src);
561 
562   void vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2);
563 
564   void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
565 
566   void paddq(XmmRegister dst, XmmRegister src);
567   void psubq(XmmRegister dst, XmmRegister src);
568 
569   void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
570   void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
571 
572   void paddusb(XmmRegister dst, XmmRegister src);
573   void paddsb(XmmRegister dst, XmmRegister src);
574   void paddusw(XmmRegister dst, XmmRegister src);
575   void paddsw(XmmRegister dst, XmmRegister src);
576   void psubusb(XmmRegister dst, XmmRegister src);
577   void psubsb(XmmRegister dst, XmmRegister src);
578   void psubusw(XmmRegister dst, XmmRegister src);
579   void psubsw(XmmRegister dst, XmmRegister src);
580 
581   void cvtsi2ss(XmmRegister dst, Register src);
582   void cvtsi2sd(XmmRegister dst, Register src);
583 
584   void cvtss2si(Register dst, XmmRegister src);
585   void cvtss2sd(XmmRegister dst, XmmRegister src);
586 
587   void cvtsd2si(Register dst, XmmRegister src);
588   void cvtsd2ss(XmmRegister dst, XmmRegister src);
589 
590   void cvttss2si(Register dst, XmmRegister src);
591   void cvttsd2si(Register dst, XmmRegister src);
592 
593   void cvtdq2ps(XmmRegister dst, XmmRegister src);
594   void cvtdq2pd(XmmRegister dst, XmmRegister src);
595 
596   void comiss(XmmRegister a, XmmRegister b);
597   void comiss(XmmRegister a, const Address& b);
598   void comisd(XmmRegister a, XmmRegister b);
599   void comisd(XmmRegister a, const Address& b);
600   void ucomiss(XmmRegister a, XmmRegister b);
601   void ucomiss(XmmRegister a, const Address& b);
602   void ucomisd(XmmRegister a, XmmRegister b);
603   void ucomisd(XmmRegister a, const Address& b);
604 
605   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
606   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
607 
608   void sqrtsd(XmmRegister dst, XmmRegister src);
609   void sqrtss(XmmRegister dst, XmmRegister src);
610 
611   void xorpd(XmmRegister dst, const Address& src);
612   void xorpd(XmmRegister dst, XmmRegister src);
613   void xorps(XmmRegister dst, const Address& src);
614   void xorps(XmmRegister dst, XmmRegister src);
615   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
616   void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
617   void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
618   void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
619 
620   void andpd(XmmRegister dst, XmmRegister src);
621   void andpd(XmmRegister dst, const Address& src);
622   void andps(XmmRegister dst, XmmRegister src);
623   void andps(XmmRegister dst, const Address& src);
624   void pand(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
625   void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
626   void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
627   void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
628 
629   void andn(Register dst, Register src1, Register src2);  // no addr variant (for now)
630   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
631   void andnps(XmmRegister dst, XmmRegister src);
632   void pandn(XmmRegister dst, XmmRegister src);
633   void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
634   void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
635   void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
636 
637   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
638   void orps(XmmRegister dst, XmmRegister src);
639   void por(XmmRegister dst, XmmRegister src);
640   void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
641   void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
642   void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
643 
644   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
645   void pavgw(XmmRegister dst, XmmRegister src);
646   void psadbw(XmmRegister dst, XmmRegister src);
647   void pmaddwd(XmmRegister dst, XmmRegister src);
648   void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
649   void phaddw(XmmRegister dst, XmmRegister src);
650   void phaddd(XmmRegister dst, XmmRegister src);
651   void haddps(XmmRegister dst, XmmRegister src);
652   void haddpd(XmmRegister dst, XmmRegister src);
653   void phsubw(XmmRegister dst, XmmRegister src);
654   void phsubd(XmmRegister dst, XmmRegister src);
655   void hsubps(XmmRegister dst, XmmRegister src);
656   void hsubpd(XmmRegister dst, XmmRegister src);
657 
658   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
659   void pmaxsb(XmmRegister dst, XmmRegister src);
660   void pminsw(XmmRegister dst, XmmRegister src);
661   void pmaxsw(XmmRegister dst, XmmRegister src);
662   void pminsd(XmmRegister dst, XmmRegister src);
663   void pmaxsd(XmmRegister dst, XmmRegister src);
664 
665   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
666   void pmaxub(XmmRegister dst, XmmRegister src);
667   void pminuw(XmmRegister dst, XmmRegister src);
668   void pmaxuw(XmmRegister dst, XmmRegister src);
669   void pminud(XmmRegister dst, XmmRegister src);
670   void pmaxud(XmmRegister dst, XmmRegister src);
671 
672   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
673   void maxps(XmmRegister dst, XmmRegister src);
674   void minpd(XmmRegister dst, XmmRegister src);
675   void maxpd(XmmRegister dst, XmmRegister src);
676 
677   void pcmpeqb(XmmRegister dst, XmmRegister src);
678   void pcmpeqw(XmmRegister dst, XmmRegister src);
679   void pcmpeqd(XmmRegister dst, XmmRegister src);
680   void pcmpeqq(XmmRegister dst, XmmRegister src);
681 
682   void pcmpgtb(XmmRegister dst, XmmRegister src);
683   void pcmpgtw(XmmRegister dst, XmmRegister src);
684   void pcmpgtd(XmmRegister dst, XmmRegister src);
685   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
686 
687   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
688   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
689   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
690 
691   void punpcklbw(XmmRegister dst, XmmRegister src);
692   void punpcklwd(XmmRegister dst, XmmRegister src);
693   void punpckldq(XmmRegister dst, XmmRegister src);
694   void punpcklqdq(XmmRegister dst, XmmRegister src);
695 
696   void punpckhbw(XmmRegister dst, XmmRegister src);
697   void punpckhwd(XmmRegister dst, XmmRegister src);
698   void punpckhdq(XmmRegister dst, XmmRegister src);
699   void punpckhqdq(XmmRegister dst, XmmRegister src);
700 
701   void psllw(XmmRegister reg, const Immediate& shift_count);
702   void pslld(XmmRegister reg, const Immediate& shift_count);
703   void psllq(XmmRegister reg, const Immediate& shift_count);
704 
705   void psraw(XmmRegister reg, const Immediate& shift_count);
706   void psrad(XmmRegister reg, const Immediate& shift_count);
707   // no psraq
708 
709   void psrlw(XmmRegister reg, const Immediate& shift_count);
710   void psrld(XmmRegister reg, const Immediate& shift_count);
711   void psrlq(XmmRegister reg, const Immediate& shift_count);
712   void psrldq(XmmRegister reg, const Immediate& shift_count);
713 
714   void flds(const Address& src);
715   void fstps(const Address& dst);
716   void fsts(const Address& dst);
717 
718   void fldl(const Address& src);
719   void fstpl(const Address& dst);
720   void fstl(const Address& dst);
721 
722   void fstsw();
723 
724   void fucompp();
725 
726   void fnstcw(const Address& dst);
727   void fldcw(const Address& src);
728 
729   void fistpl(const Address& dst);
730   void fistps(const Address& dst);
731   void fildl(const Address& src);
732   void filds(const Address& src);
733 
734   void fincstp();
735   void ffree(const Immediate& index);
736 
737   void fsin();
738   void fcos();
739   void fptan();
740   void fprem();
741 
742   void xchgb(ByteRegister dst, ByteRegister src);
743   void xchgb(ByteRegister reg, const Address& address);
744 
745   // Wrappers for `xchgb` that accept `Register` instead of `ByteRegister` (used for testing).
746   void xchgb(Register dst, Register src);
747   void xchgb(Register reg, const Address& address);
748 
749   void xchgw(Register dst, Register src);
750   void xchgw(Register reg, const Address& address);
751 
752   void xchgl(Register dst, Register src);
753   void xchgl(Register reg, const Address& address);
754 
755   void cmpb(const Address& address, const Immediate& imm);
756   void cmpw(const Address& address, const Immediate& imm);
757 
758   void cmpl(Register reg, const Immediate& imm);
759   void cmpl(Register reg0, Register reg1);
760   void cmpl(Register reg, const Address& address);
761 
762   void cmpl(const Address& address, Register reg);
763   void cmpl(const Address& address, const Immediate& imm);
764 
765   void testl(Register reg1, Register reg2);
766   void testl(Register reg, const Immediate& imm);
767   void testl(Register reg1, const Address& address);
768 
769   void testb(const Address& dst, const Immediate& imm);
770   void testl(const Address& dst, const Immediate& imm);
771 
772   void andl(Register dst, const Immediate& imm);
773   void andl(Register dst, Register src);
774   void andl(Register dst, const Address& address);
775   void andw(const Address& address, const Immediate& imm);
776 
777   void orl(Register dst, const Immediate& imm);
778   void orl(Register dst, Register src);
779   void orl(Register dst, const Address& address);
780 
781   void xorl(Register dst, Register src);
782   void xorl(Register dst, const Immediate& imm);
783   void xorl(Register dst, const Address& address);
784 
785   void addl(Register dst, Register src);
786   void addl(Register reg, const Immediate& imm);
787   void addl(Register reg, const Address& address);
788 
789   void addl(const Address& address, Register reg);
790   void addl(const Address& address, const Immediate& imm);
791   void addw(const Address& address, const Immediate& imm);
792   void addw(Register reg, const Immediate& imm);
793 
794   void adcl(Register dst, Register src);
795   void adcl(Register reg, const Immediate& imm);
796   void adcl(Register dst, const Address& address);
797 
798   void subl(Register dst, Register src);
799   void subl(Register reg, const Immediate& imm);
800   void subl(Register reg, const Address& address);
801   void subl(const Address& address, Register src);
802 
803   void cdq();
804 
805   void idivl(Register reg);
806   void divl(Register reg);
807 
808   void imull(Register dst, Register src);
809   void imull(Register reg, const Immediate& imm);
810   void imull(Register dst, Register src, const Immediate& imm);
811   void imull(Register reg, const Address& address);
812 
813   void imull(Register reg);
814   void imull(const Address& address);
815 
816   void mull(Register reg);
817   void mull(const Address& address);
818 
819   void sbbl(Register dst, Register src);
820   void sbbl(Register reg, const Immediate& imm);
821   void sbbl(Register reg, const Address& address);
822   void sbbl(const Address& address, Register src);
823 
824   void incl(Register reg);
825   void incl(const Address& address);
826 
827   void decl(Register reg);
828   void decl(const Address& address);
829 
830   void shll(Register reg, const Immediate& imm);
831   void shll(Register operand, Register shifter);
832   void shll(const Address& address, const Immediate& imm);
833   void shll(const Address& address, Register shifter);
834   void shrl(Register reg, const Immediate& imm);
835   void shrl(Register operand, Register shifter);
836   void shrl(const Address& address, const Immediate& imm);
837   void shrl(const Address& address, Register shifter);
838   void sarl(Register reg, const Immediate& imm);
839   void sarl(Register operand, Register shifter);
840   void sarl(const Address& address, const Immediate& imm);
841   void sarl(const Address& address, Register shifter);
842   void shld(Register dst, Register src, Register shifter);
843   void shld(Register dst, Register src, const Immediate& imm);
844   void shrd(Register dst, Register src, Register shifter);
845   void shrd(Register dst, Register src, const Immediate& imm);
846 
847   void negl(Register reg);
848   void notl(Register reg);
849 
850   void enter(const Immediate& imm);
851   void leave();
852 
853   void ret();
854   void ret(const Immediate& imm);
855 
856   void nop();
857   void int3();
858   void hlt();
859 
860   void j(Condition condition, Label* label);
861   void j(Condition condition, NearLabel* label);
862   void jecxz(NearLabel* label);
863 
864   void jmp(Register reg);
865   void jmp(const Address& address);
866   void jmp(Label* label);
867   void jmp(NearLabel* label);
868 
869   void repne_scasb();
870   void repne_scasw();
871   void repe_cmpsb();
872   void repe_cmpsw();
873   void repe_cmpsl();
874   void rep_movsb();
875   void rep_movsl();
876   void rep_movsw();
877 
878   X86Assembler* lock();
879   void cmpxchgb(const Address& address, ByteRegister reg);
880   void cmpxchgw(const Address& address, Register reg);
881   void cmpxchgl(const Address& address, Register reg);
882   void cmpxchg8b(const Address& address);
883 
884   void xaddb(const Address& address, ByteRegister reg);
885   void xaddw(const Address& address, Register reg);
886   void xaddl(const Address& address, Register reg);
887 
888   void mfence();
889 
890   X86Assembler* fs();
891   X86Assembler* gs();
892 
893   //
894   // Macros for High-level operations.
895   //
896 
897   void AddImmediate(Register reg, const Immediate& imm);
898 
899   void LoadLongConstant(XmmRegister dst, int64_t value);
900   void LoadDoubleConstant(XmmRegister dst, double value);
901 
902   // For testing purpose (Repeat* functions expect Register rather than ByteRegister).
cmpxchgb(const Address & address,Register reg)903   void cmpxchgb(const Address& address, Register reg) {
904     cmpxchgb(address, static_cast<ByteRegister>(reg));
905   }
906 
907   // For testing purpose (Repeat* functions expect Register rather than ByteRegister).
LockCmpxchgb(const Address & address,Register reg)908   void LockCmpxchgb(const Address& address, Register reg) {
909     LockCmpxchgb(address, static_cast<ByteRegister>(reg));
910   }
911 
LockCmpxchgb(const Address & address,ByteRegister reg)912   void LockCmpxchgb(const Address& address, ByteRegister reg) {
913     lock()->cmpxchgb(address, reg);
914   }
915 
LockCmpxchgw(const Address & address,Register reg)916   void LockCmpxchgw(const Address& address, Register reg) {
917     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
918     // We make sure that the operand size override bytecode is emited before the lock bytecode.
919     // We test against clang which enforces this bytecode order.
920     EmitOperandSizeOverride();
921     EmitUint8(0xF0);
922     EmitUint8(0x0F);
923     EmitUint8(0xB1);
924     EmitOperand(reg, address);
925   }
926 
LockCmpxchgl(const Address & address,Register reg)927   void LockCmpxchgl(const Address& address, Register reg) {
928     lock()->cmpxchgl(address, reg);
929   }
930 
LockCmpxchg8b(const Address & address)931   void LockCmpxchg8b(const Address& address) {
932     lock()->cmpxchg8b(address);
933   }
934 
935   // For testing purpose (Repeat* functions expect Register rather than ByteRegister).
LockXaddb(const Address & address,Register reg)936   void LockXaddb(const Address& address, Register reg) {
937     LockXaddb(address, static_cast<ByteRegister>(reg));
938   }
939 
LockXaddb(const Address & address,ByteRegister reg)940   void LockXaddb(const Address& address, ByteRegister reg) {
941     lock()->xaddb(address, reg);
942   }
943 
LockXaddw(const Address & address,Register reg)944   void LockXaddw(const Address& address, Register reg) {
945     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
946     // We make sure that the operand size override bytecode is emited before the lock bytecode.
947     // We test against clang which enforces this bytecode order.
948     EmitOperandSizeOverride();
949     EmitUint8(0xF0);
950     EmitUint8(0x0F);
951     EmitUint8(0xC1);
952     EmitOperand(reg, address);
953   }
954 
LockXaddl(const Address & address,Register reg)955   void LockXaddl(const Address& address, Register reg) {
956     lock()->xaddl(address, reg);
957   }
958 
rdtsc()959   void rdtsc() {
960     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961     EmitUint8(0x0F);
962     EmitUint8(0x31);
963   }
964 
965   //
966   // Misc. functionality
967   //
PreferredLoopAlignment()968   int PreferredLoopAlignment() { return 16; }
969   void Align(int alignment, int offset);
970   void Bind(Label* label) override;
Jump(Label * label)971   void Jump(Label* label) override {
972     jmp(label);
973   }
974   void Bind(NearLabel* label);
975 
976   //
977   // Heap poisoning.
978   //
979 
980   // Poison a heap reference contained in `reg`.
PoisonHeapReference(Register reg)981   void PoisonHeapReference(Register reg) { negl(reg); }
982   // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(Register reg)983   void UnpoisonHeapReference(Register reg) { negl(reg); }
984   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(Register reg)985   void MaybePoisonHeapReference(Register reg) {
986     if (kPoisonHeapReferences) {
987       PoisonHeapReference(reg);
988     }
989   }
990   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(Register reg)991   void MaybeUnpoisonHeapReference(Register reg) {
992     if (kPoisonHeapReferences) {
993       UnpoisonHeapReference(reg);
994     }
995   }
996 
997   // Add a double to the constant area, returning the offset into
998   // the constant area where the literal resides.
AddDouble(double v)999   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
1000 
1001   // Add a float to the constant area, returning the offset into
1002   // the constant area where the literal resides.
AddFloat(float v)1003   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
1004 
1005   // Add an int32_t to the constant area, returning the offset into
1006   // the constant area where the literal resides.
AddInt32(int32_t v)1007   size_t AddInt32(int32_t v) {
1008     return constant_area_.AddInt32(v);
1009   }
1010 
1011   // Add an int32_t to the end of the constant area, returning the offset into
1012   // the constant area where the literal resides.
AppendInt32(int32_t v)1013   size_t AppendInt32(int32_t v) {
1014     return constant_area_.AppendInt32(v);
1015   }
1016 
1017   // Add an int64_t to the constant area, returning the offset into
1018   // the constant area where the literal resides.
AddInt64(int64_t v)1019   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
1020 
1021   // Add the contents of the constant area to the assembler buffer.
1022   void AddConstantArea();
1023 
1024   // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()1025   bool IsConstantAreaEmpty() const { return constant_area_.IsEmpty(); }
1026 
1027   // Return the current size of the constant area.
ConstantAreaSize()1028   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
1029 
1030   bool CpuHasAVXorAVX2FeatureFlag();
1031 
1032  private:
1033   inline void EmitUint8(uint8_t value);
1034   inline void EmitInt32(int32_t value);
1035   inline void EmitRegisterOperand(int rm, int reg);
1036   inline void EmitXmmRegisterOperand(int rm, XmmRegister reg);
1037   inline void EmitFixup(AssemblerFixup* fixup);
1038   inline void EmitOperandSizeOverride();
1039 
1040   void EmitOperand(int rm, const Operand& operand);
1041   void EmitImmediate(const Immediate& imm, bool is_16_op = false);
1042   void EmitComplex(
1043       int rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
1044   void EmitLabel(Label* label, int instruction_size);
1045   void EmitLabelLink(Label* label);
1046   void EmitLabelLink(NearLabel* label);
1047 
1048   void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
1049   void EmitGenericShift(int rm, const Operand& operand, Register shifter);
1050 
1051   uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
1052   uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
1053   uint8_t EmitVexPrefixByteOne(bool R,
1054                                X86ManagedRegister operand,
1055                                int SET_VEX_L,
1056                                int SET_VEX_PP);
1057   uint8_t EmitVexPrefixByteTwo(bool W,
1058                                X86ManagedRegister operand,
1059                                int SET_VEX_L,
1060                                int SET_VEX_PP);
1061   uint8_t EmitVexPrefixByteTwo(bool W,
1062                                int SET_VEX_L,
1063                                int SET_VEX_PP);
1064 
1065   // Helper function to emit a shorter variant of XCHG for when at least one operand is EAX/AX.
1066   bool try_xchg_eax(Register dst, Register src);
1067 
1068   ConstantArea constant_area_;
1069   bool has_AVX_;     // x86 256bit SIMD AVX.
1070   bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
1071 
1072   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
1073 };
1074 
EmitUint8(uint8_t value)1075 inline void X86Assembler::EmitUint8(uint8_t value) {
1076   buffer_.Emit<uint8_t>(value);
1077 }
1078 
EmitInt32(int32_t value)1079 inline void X86Assembler::EmitInt32(int32_t value) {
1080   buffer_.Emit<int32_t>(value);
1081 }
1082 
EmitRegisterOperand(int rm,int reg)1083 inline void X86Assembler::EmitRegisterOperand(int rm, int reg) {
1084   CHECK_GE(rm, 0);
1085   CHECK_LT(rm, 8);
1086   buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg);
1087 }
1088 
EmitXmmRegisterOperand(int rm,XmmRegister reg)1089 inline void X86Assembler::EmitXmmRegisterOperand(int rm, XmmRegister reg) {
1090   EmitRegisterOperand(rm, static_cast<Register>(reg));
1091 }
1092 
EmitFixup(AssemblerFixup * fixup)1093 inline void X86Assembler::EmitFixup(AssemblerFixup* fixup) {
1094   buffer_.EmitFixup(fixup);
1095 }
1096 
EmitOperandSizeOverride()1097 inline void X86Assembler::EmitOperandSizeOverride() {
1098   EmitUint8(0x66);
1099 }
1100 
1101 }  // namespace x86
1102 }  // namespace art
1103 
1104 #endif  // ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
1105