• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
19 
20 #include "arch/x86_64/instruction_set_features_x86_64.h"
21 #include "base/macros.h"
22 #include "code_generator.h"
23 #include "driver/compiler_options.h"
24 #include "nodes.h"
25 #include "parallel_move_resolver.h"
26 #include "utils/x86_64/assembler_x86_64.h"
27 
28 namespace art HIDDEN {
29 namespace x86_64 {
30 
31 // Use a local definition to prevent copying mistakes.
32 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
33 
34 // Some x86_64 instructions require a register to be available as temp.
35 static constexpr Register TMP = R11;
36 
37 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
38 static constexpr FloatRegister kParameterFloatRegisters[] =
39     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
40 
41 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
42 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
43 
44 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
45 static constexpr size_t kRuntimeParameterCoreRegistersLength =
46     arraysize(kRuntimeParameterCoreRegisters);
47 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
48 static constexpr size_t kRuntimeParameterFpuRegistersLength =
49     arraysize(kRuntimeParameterFpuRegisters);
50 
51 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
52 // If the ART ABI changes, this list must be updated.  It is used to ensure that
53 // these are not clobbered by any direct call to native code (such as math intrinsics).
54 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
55 
56 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
57   V(CRC32Update)                               \
58   V(CRC32UpdateBytes)                          \
59   V(CRC32UpdateByteBuffer)                     \
60   V(FP16ToFloat)                               \
61   V(FP16ToHalf)                                \
62   V(FP16Floor)                                 \
63   V(FP16Ceil)                                  \
64   V(FP16Rint)                                  \
65   V(FP16Greater)                               \
66   V(FP16GreaterEquals)                         \
67   V(FP16Less)                                  \
68   V(FP16LessEquals)                            \
69   V(FP16Compare)                               \
70   V(FP16Min)                                   \
71   V(FP16Max)                                   \
72   V(IntegerRemainderUnsigned)                  \
73   V(LongRemainderUnsigned)                     \
74   V(StringStringIndexOf)                       \
75   V(StringStringIndexOfAfter)                  \
76   V(StringBufferAppend)                        \
77   V(StringBufferLength)                        \
78   V(StringBufferToString)                      \
79   V(StringBuilderAppendObject)                 \
80   V(StringBuilderAppendString)                 \
81   V(StringBuilderAppendCharSequence)           \
82   V(StringBuilderAppendCharArray)              \
83   V(StringBuilderAppendBoolean)                \
84   V(StringBuilderAppendChar)                   \
85   V(StringBuilderAppendInt)                    \
86   V(StringBuilderAppendLong)                   \
87   V(StringBuilderAppendFloat)                  \
88   V(StringBuilderAppendDouble)                 \
89   V(StringBuilderLength)                       \
90   V(StringBuilderToString)                     \
91   /* 1.8 */                                    \
92   V(MethodHandleInvokeExact)                   \
93   V(MethodHandleInvoke)
94 
95 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
96  public:
InvokeRuntimeCallingConvention()97   InvokeRuntimeCallingConvention()
98       : CallingConvention(kRuntimeParameterCoreRegisters,
99                           kRuntimeParameterCoreRegistersLength,
100                           kRuntimeParameterFpuRegisters,
101                           kRuntimeParameterFpuRegistersLength,
102                           kX86_64PointerSize) {}
103 
104  private:
105   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
106 };
107 
108 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
109  public:
InvokeDexCallingConvention()110   InvokeDexCallingConvention() : CallingConvention(
111       kParameterCoreRegisters,
112       kParameterCoreRegistersLength,
113       kParameterFloatRegisters,
114       kParameterFloatRegistersLength,
115       kX86_64PointerSize) {}
116 
117  private:
118   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
119 };
120 
121 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
122  public:
CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)123   explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
124       : for_register_allocation_(for_register_allocation) {}
125 
~CriticalNativeCallingConventionVisitorX86_64()126   virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
127 
128   Location GetNextLocation(DataType::Type type) override;
129   Location GetReturnLocation(DataType::Type type) const override;
130   Location GetMethodLocation() const override;
131 
GetStackOffset()132   size_t GetStackOffset() const { return stack_offset_; }
133 
134  private:
135   // Register allocator does not support adjusting frame size, so we cannot provide final locations
136   // of stack arguments for register allocation. We ask the register allocator for any location and
137   // move these arguments to the right place after adjusting the SP when generating the call.
138   const bool for_register_allocation_;
139   size_t gpr_index_ = 0u;
140   size_t fpr_index_ = 0u;
141   size_t stack_offset_ = 0u;
142 
143   DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
144 };
145 
146 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
147  public:
FieldAccessCallingConventionX86_64()148   FieldAccessCallingConventionX86_64() {}
149 
GetObjectLocation()150   Location GetObjectLocation() const override {
151     return Location::RegisterLocation(RSI);
152   }
GetFieldIndexLocation()153   Location GetFieldIndexLocation() const override {
154     return Location::RegisterLocation(RDI);
155   }
GetReturnLocation(DataType::Type type)156   Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
157     return Location::RegisterLocation(RAX);
158   }
GetSetValueLocation(DataType::Type type,bool is_instance)159   Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
160                                bool is_instance) const override {
161     return is_instance
162         ? Location::RegisterLocation(RDX)
163         : Location::RegisterLocation(RSI);
164   }
GetFpuLocation(DataType::Type type)165   Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
166     return Location::FpuRegisterLocation(XMM0);
167   }
168 
169  private:
170   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64);
171 };
172 
173 
174 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
175  public:
InvokeDexCallingConventionVisitorX86_64()176   InvokeDexCallingConventionVisitorX86_64() {}
~InvokeDexCallingConventionVisitorX86_64()177   virtual ~InvokeDexCallingConventionVisitorX86_64() {}
178 
179   Location GetNextLocation(DataType::Type type) override;
180   Location GetReturnLocation(DataType::Type type) const override;
181   Location GetMethodLocation() const override;
182 
183  private:
184   InvokeDexCallingConvention calling_convention;
185 
186   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
187 };
188 
189 class CodeGeneratorX86_64;
190 
191 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
192  public:
ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)193   ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
194       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
195 
196   void EmitMove(size_t index) override;
197   void EmitSwap(size_t index) override;
198   void SpillScratch(int reg) override;
199   void RestoreScratch(int reg) override;
200 
201   X86_64Assembler* GetAssembler() const;
202 
203  private:
204   void Exchange32(CpuRegister reg, int mem);
205   void Exchange32(XmmRegister reg, int mem);
206   void Exchange64(CpuRegister reg1, CpuRegister reg2);
207   void Exchange64(CpuRegister reg, int mem);
208   void Exchange64(XmmRegister reg, int mem);
209   void Exchange128(XmmRegister reg, int mem);
210   void ExchangeMemory32(int mem1, int mem2);
211   void ExchangeMemory64(int mem1, int mem2, int num_of_qwords);
212 
213   CodeGeneratorX86_64* const codegen_;
214 
215   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
216 };
217 
218 class LocationsBuilderX86_64 : public HGraphVisitor {
219  public:
LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)220   LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
221       : HGraphVisitor(graph), codegen_(codegen) {}
222 
223 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
224   void Visit##name(H##name* instr) override;
225 
226   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)227   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
228   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
229 
230 #undef DECLARE_VISIT_INSTRUCTION
231 
232   void VisitInstruction(HInstruction* instruction) override {
233     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
234                << " (id " << instruction->GetId() << ")";
235   }
236 
237  private:
238   void HandleInvoke(HInvoke* invoke);
239   void HandleBitwiseOperation(HBinaryOperation* operation);
240   void HandleCondition(HCondition* condition);
241   void HandleShift(HBinaryOperation* operation);
242   void HandleFieldSet(HInstruction* instruction,
243                       const FieldInfo& field_info,
244                       WriteBarrierKind write_barrier_kind);
245   void HandleFieldGet(HInstruction* instruction);
246   bool CpuHasAvxFeatureFlag();
247   bool CpuHasAvx2FeatureFlag();
248 
249   CodeGeneratorX86_64* const codegen_;
250   InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
251 
252   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
253 };
254 
255 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
256  public:
257   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
258 
259 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
260   void Visit##name(H##name* instr) override;
261 
262   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)263   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
264   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
265 
266 #undef DECLARE_VISIT_INSTRUCTION
267 
268   void VisitInstruction(HInstruction* instruction) override {
269     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
270                << " (id " << instruction->GetId() << ")";
271   }
272 
GetAssembler()273   X86_64Assembler* GetAssembler() const { return assembler_; }
274 
275   // Generate a GC root reference load:
276   //
277   //   root <- *address
278   //
279   // while honoring read barriers based on read_barrier_option.
280   void GenerateGcRootFieldLoad(HInstruction* instruction,
281                                Location root,
282                                const Address& address,
283                                Label* fixup_label,
284                                ReadBarrierOption read_barrier_option);
285   void HandleFieldSet(HInstruction* instruction,
286                       uint32_t value_index,
287                       uint32_t extra_temp_index,
288                       DataType::Type field_type,
289                       Address field_addr,
290                       CpuRegister base,
291                       bool is_volatile,
292                       bool is_atomic,
293                       bool value_can_be_null,
294                       bool byte_swap,
295                       WriteBarrierKind write_barrier_kind);
296 
297   void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
298 
299  private:
300   // Generate code for the given suspend check. If not null, `successor`
301   // is the block to branch to if the suspend check is not needed, and after
302   // the suspend call.
303   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
304   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
305   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
306   void HandleBitwiseOperation(HBinaryOperation* operation);
307   void GenerateRemFP(HRem* rem);
308   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
309   void DivByPowerOfTwo(HDiv* instruction);
310   void RemByPowerOfTwo(HRem* instruction);
311   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
312   void GenerateDivRemIntegral(HBinaryOperation* instruction);
313   void HandleCondition(HCondition* condition);
314   void HandleShift(HBinaryOperation* operation);
315 
316   void HandleFieldSet(HInstruction* instruction,
317                       const FieldInfo& field_info,
318                       bool value_can_be_null,
319                       WriteBarrierKind write_barrier_kind);
320   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
321 
322   void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
323   void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
324   void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
325   void GenerateMethodEntryExitHook(HInstruction* instruction);
326 
327   // Generate a heap reference load using one register `out`:
328   //
329   //   out <- *(out + offset)
330   //
331   // while honoring heap poisoning and/or read barriers (if any).
332   //
333   // Location `maybe_temp` is used when generating a read barrier and
334   // shall be a register in that case; it may be an invalid location
335   // otherwise.
336   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
337                                         Location out,
338                                         uint32_t offset,
339                                         Location maybe_temp,
340                                         ReadBarrierOption read_barrier_option);
341   // Generate a heap reference load using two different registers
342   // `out` and `obj`:
343   //
344   //   out <- *(obj + offset)
345   //
346   // while honoring heap poisoning and/or read barriers (if any).
347   //
348   // Location `maybe_temp` is used when generating a Baker's (fast
349   // path) read barrier and shall be a register in that case; it may
350   // be an invalid location otherwise.
351   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
352                                          Location out,
353                                          Location obj,
354                                          uint32_t offset,
355                                          ReadBarrierOption read_barrier_option);
356 
357   void PushOntoFPStack(Location source, uint32_t temp_offset,
358                        uint32_t stack_adjustment, bool is_float);
359   void GenerateCompareTest(HCondition* condition);
360   template<class LabelType>
361   void GenerateTestAndBranch(HInstruction* instruction,
362                              size_t condition_input_index,
363                              LabelType* true_target,
364                              LabelType* false_target);
365   template<class LabelType>
366   void GenerateCompareTestAndBranch(HCondition* condition,
367                                     LabelType* true_target,
368                                     LabelType* false_target);
369   template<class LabelType>
370   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
371 
372   void HandleGoto(HInstruction* got, HBasicBlock* successor);
373 
374   bool CpuHasAvxFeatureFlag();
375   bool CpuHasAvx2FeatureFlag();
376 
377   X86_64Assembler* const assembler_;
378   CodeGeneratorX86_64* const codegen_;
379 
380   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
381 };
382 
383 // Class for fixups to jump tables.
384 class JumpTableRIPFixup;
385 
386 class CodeGeneratorX86_64 : public CodeGenerator {
387  public:
388   CodeGeneratorX86_64(HGraph* graph,
389                   const CompilerOptions& compiler_options,
390                   OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorX86_64()391   virtual ~CodeGeneratorX86_64() {}
392 
393   void GenerateFrameEntry() override;
394   void GenerateFrameExit() override;
395   void Bind(HBasicBlock* block) override;
396   void MoveConstant(Location destination, int32_t value) override;
397   void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
398   void AddLocationAsTemp(Location location, LocationSummary* locations) override;
399 
400   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
401   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
402   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
403   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
404 
405   // Generate code to invoke a runtime entry point.
406   void InvokeRuntime(QuickEntrypointEnum entrypoint,
407                      HInstruction* instruction,
408                      uint32_t dex_pc,
409                      SlowPathCode* slow_path = nullptr) override;
410 
411   // Generate code to invoke a runtime entry point, but do not record
412   // PC-related information in a stack map.
413   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
414                                            HInstruction* instruction,
415                                            SlowPathCode* slow_path);
416 
417   void GenerateInvokeRuntime(int32_t entry_point_offset);
418 
GetWordSize()419   size_t GetWordSize() const override {
420     return kX86_64WordSize;
421   }
422 
GetSlowPathFPWidth()423   size_t GetSlowPathFPWidth() const override {
424     return GetGraph()->HasSIMD()
425         ? GetSIMDRegisterWidth()
426         : 1 * kX86_64WordSize;  //  8 bytes == 1 x86_64 words for each spill
427   }
428 
GetCalleePreservedFPWidth()429   size_t GetCalleePreservedFPWidth() const override {
430     return 1 * kX86_64WordSize;
431   }
432 
GetSIMDRegisterWidth()433   size_t GetSIMDRegisterWidth() const override {
434     return 2 * kX86_64WordSize;
435   }
436 
GetLocationBuilder()437   HGraphVisitor* GetLocationBuilder() override {
438     return &location_builder_;
439   }
440 
GetInstructionVisitor()441   HGraphVisitor* GetInstructionVisitor() override {
442     return &instruction_visitor_;
443   }
444 
GetAssembler()445   X86_64Assembler* GetAssembler() override {
446     return &assembler_;
447   }
448 
GetAssembler()449   const X86_64Assembler& GetAssembler() const override {
450     return assembler_;
451   }
452 
GetMoveResolver()453   ParallelMoveResolverX86_64* GetMoveResolver() override {
454     return &move_resolver_;
455   }
456 
GetAddressOf(HBasicBlock * block)457   uintptr_t GetAddressOf(HBasicBlock* block) override {
458     return GetLabelOf(block)->Position();
459   }
460 
461   void SetupBlockedRegisters() const override;
462   void DumpCoreRegister(std::ostream& stream, int reg) const override;
463   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
464   void Finalize() override;
465 
GetInstructionSet()466   InstructionSet GetInstructionSet() const override {
467     return InstructionSet::kX86_64;
468   }
469 
GetInstructionCodegen()470   InstructionCodeGeneratorX86_64* GetInstructionCodegen() {
471     return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor());
472   }
473 
474   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
475 
476   // Emit a write barrier if:
477   // A) emit_null_check is false
478   // B) emit_null_check is true, and value is not null.
479   void MaybeMarkGCCard(CpuRegister temp,
480                        CpuRegister card,
481                        CpuRegister object,
482                        CpuRegister value,
483                        bool emit_null_check);
484 
485   // Emit a write barrier unconditionally.
486   void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object);
487 
488   // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert
489   // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC
490   // GC is marking for eliminated write barriers.
491   void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object);
492 
493   void GenerateMemoryBarrier(MemBarrierKind kind);
494 
495   // Helper method to move a value between two locations.
496   void Move(Location destination, Location source);
497   // Helper method to load a value of non-reference type from memory.
498   void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src);
499 
GetLabelOf(HBasicBlock * block)500   Label* GetLabelOf(HBasicBlock* block) const {
501     return CommonGetLabelOf<Label>(block_labels_, block);
502   }
503 
Initialize()504   void Initialize() override {
505     block_labels_ = CommonInitializeLabels<Label>();
506   }
507 
NeedsTwoRegisters(DataType::Type type)508   bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
509 
510   // Check if the desired_string_load_kind is supported. If it is, return it,
511   // otherwise return a fall-back kind that should be used instead.
512   HLoadString::LoadKind GetSupportedLoadStringKind(
513       HLoadString::LoadKind desired_string_load_kind) override;
514 
515   // Check if the desired_class_load_kind is supported. If it is, return it,
516   // otherwise return a fall-back kind that should be used instead.
517   HLoadClass::LoadKind GetSupportedLoadClassKind(
518       HLoadClass::LoadKind desired_class_load_kind) override;
519 
520   // Check if the desired_dispatch_info is supported. If it is, return it,
521   // otherwise return a fall-back info that should be used instead.
522   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
523       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
524       ArtMethod* method) override;
525 
526   void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
527   void GenerateStaticOrDirectCall(
528       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
529   void GenerateVirtualCall(
530       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
531 
532   void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data);
533   void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
534   void RecordBootImageMethodPatch(HInvoke* invoke);
535   void RecordMethodBssEntryPatch(HInvoke* invoke);
536   void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
537   void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
538   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
539   void RecordBootImageStringPatch(HLoadString* load_string);
540   Label* NewStringBssEntryPatch(HLoadString* load_string);
541   Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type);
542   void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke);
543   Label* NewJitRootStringPatch(const DexFile& dex_file,
544                                dex::StringIndex string_index,
545                                Handle<mirror::String> handle);
546   Label* NewJitRootClassPatch(const DexFile& dex_file,
547                               dex::TypeIndex type_index,
548                               Handle<mirror::Class> handle);
549 
550   void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
551   void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke);
552   void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root);
553 
554   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
555 
556   void PatchJitRootUse(uint8_t* code,
557                        const uint8_t* roots_data,
558                        const PatchInfo<Label>& info,
559                        uint64_t index_in_table) const;
560 
561   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
562 
563   // Fast path implementation of ReadBarrier::Barrier for a heap
564   // reference field load when Baker's read barriers are used.
565   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
566                                              Location ref,
567                                              CpuRegister obj,
568                                              uint32_t offset,
569                                              bool needs_null_check);
570   // Fast path implementation of ReadBarrier::Barrier for a heap
571   // reference array load when Baker's read barriers are used.
572   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
573                                              Location ref,
574                                              CpuRegister obj,
575                                              uint32_t data_offset,
576                                              Location index,
577                                              bool needs_null_check);
578   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
579   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
580   //
581   // Load the object reference located at address `src`, held by
582   // object `obj`, into `ref`, and mark it if needed.  The base of
583   // address `src` must be `obj`.
584   //
585   // If `always_update_field` is true, the value of the reference is
586   // atomically updated in the holder (`obj`).  This operation
587   // requires two temporary registers, which must be provided as
588   // non-null pointers (`temp1` and `temp2`).
589   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
590                                                  Location ref,
591                                                  CpuRegister obj,
592                                                  const Address& src,
593                                                  bool needs_null_check,
594                                                  bool always_update_field = false,
595                                                  CpuRegister* temp1 = nullptr,
596                                                  CpuRegister* temp2 = nullptr);
597 
598   // Generate a read barrier for a heap reference within `instruction`
599   // using a slow path.
600   //
601   // A read barrier for an object reference read from the heap is
602   // implemented as a call to the artReadBarrierSlow runtime entry
603   // point, which is passed the values in locations `ref`, `obj`, and
604   // `offset`:
605   //
606   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
607   //                                      mirror::Object* obj,
608   //                                      uint32_t offset);
609   //
610   // The `out` location contains the value returned by
611   // artReadBarrierSlow.
612   //
613   // When `index` provided (i.e., when it is different from
614   // Location::NoLocation()), the offset value passed to
615   // artReadBarrierSlow is adjusted to take `index` into account.
616   void GenerateReadBarrierSlow(HInstruction* instruction,
617                                Location out,
618                                Location ref,
619                                Location obj,
620                                uint32_t offset,
621                                Location index = Location::NoLocation());
622 
623   // If read barriers are enabled, generate a read barrier for a heap
624   // reference using a slow path. If heap poisoning is enabled, also
625   // unpoison the reference in `out`.
626   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
627                                     Location out,
628                                     Location ref,
629                                     Location obj,
630                                     uint32_t offset,
631                                     Location index = Location::NoLocation());
632 
633   // Generate a read barrier for a GC root within `instruction` using
634   // a slow path.
635   //
636   // A read barrier for an object reference GC root is implemented as
637   // a call to the artReadBarrierForRootSlow runtime entry point,
638   // which is passed the value in location `root`:
639   //
640   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
641   //
642   // The `out` location contains the value returned by
643   // artReadBarrierForRootSlow.
644   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
645 
ConstantAreaStart()646   int ConstantAreaStart() const {
647     return constant_area_start_;
648   }
649 
650   Address LiteralDoubleAddress(double v);
651   Address LiteralFloatAddress(float v);
652   Address LiteralInt32Address(int32_t v);
653   Address LiteralInt64Address(int64_t v);
654 
655   // Load a 32/64-bit value into a register in the most efficient manner.
656   void Load32BitValue(CpuRegister dest, int32_t value);
657   void Load64BitValue(CpuRegister dest, int64_t value);
658   void Load32BitValue(XmmRegister dest, int32_t value);
659   void Load64BitValue(XmmRegister dest, int64_t value);
660   void Load32BitValue(XmmRegister dest, float value);
661   void Load64BitValue(XmmRegister dest, double value);
662 
663   // Compare a register with a 32/64-bit value in the most efficient manner.
664   void Compare32BitValue(CpuRegister dest, int32_t value);
665   void Compare64BitValue(CpuRegister dest, int64_t value);
666 
667   // Compare int values. Supports register locations for `lhs`.
668   void GenerateIntCompare(Location lhs, Location rhs);
669   void GenerateIntCompare(CpuRegister lhs, Location rhs);
670 
671   // Compare long values. Supports only register locations for `lhs`.
672   void GenerateLongCompare(Location lhs, Location rhs);
673 
674   // Construct address for array access.
675   static Address ArrayAddress(CpuRegister obj,
676                               Location index,
677                               ScaleFactor scale,
678                               uint32_t data_offset);
679 
680   Address LiteralCaseTable(HPackedSwitch* switch_instr);
681 
682   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
683   void Store64BitValueToStack(Location dest, int64_t value);
684 
685   void MoveFromReturnRegister(Location trg, DataType::Type type) override;
686 
687   // Assign a 64 bit constant to an address.
688   void MoveInt64ToAddress(const Address& addr_low,
689                           const Address& addr_high,
690                           int64_t v,
691                           HInstruction* instruction);
692 
693   // Ensure that prior stores complete to memory before subsequent loads.
694   // The locked add implementation will avoid serializing device memory, but will
695   // touch (but not change) the top of the stack.
696   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
697   void MemoryFence(bool force_mfence = false) {
698     if (!force_mfence) {
699       assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
700     } else {
701       assembler_.mfence();
702     }
703   }
704 
705   void IncreaseFrame(size_t adjustment) override;
706   void DecreaseFrame(size_t adjustment) override;
707 
708   void GenerateNop() override;
709   void GenerateImplicitNullCheck(HNullCheck* instruction) override;
710   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
711   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
712 
713   void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
714 
715   static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
716 
717   // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset.
718   // We will fix this up in the linker later to have the right value.
719   static constexpr int32_t kPlaceholder32BitOffset = 256;
720 
721  private:
722   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
723   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
724                                           ArenaVector<linker::LinkerPatch>* linker_patches);
725 
726   // Labels for each block that will be compiled.
727   Label* block_labels_;  // Indexed by block id.
728   Label frame_entry_label_;
729   LocationsBuilderX86_64 location_builder_;
730   InstructionCodeGeneratorX86_64 instruction_visitor_;
731   ParallelMoveResolverX86_64 move_resolver_;
732   X86_64Assembler assembler_;
733 
734   // Offset to the start of the constant area in the assembled code.
735   // Used for fixups to the constant area.
736   int constant_area_start_;
737 
738   // PC-relative method patch info for kBootImageLinkTimePcRelative.
739   ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
740   // PC-relative method patch info for kBssEntry.
741   ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
742   // PC-relative type patch info for kBootImageLinkTimePcRelative.
743   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
744   // PC-relative type patch info for kAppImageRelRo.
745   ArenaDeque<PatchInfo<Label>> app_image_type_patches_;
746   // PC-relative type patch info for kBssEntry.
747   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
748   // PC-relative public type patch info for kBssEntryPublic.
749   ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_;
750   // PC-relative package type patch info for kBssEntryPackage.
751   ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_;
752   // PC-relative String patch info for kBootImageLinkTimePcRelative.
753   ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
754   // PC-relative String patch info for kBssEntry.
755   ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
756   // PC-relative MethodType patch info for kBssEntry.
757   ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_;
758   // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
759   ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_;
760   // PC-relative patch info for IntrinsicObjects for the boot image,
761   // and for method/type/string patches for kBootImageRelRo otherwise.
762   ArenaDeque<PatchInfo<Label>> boot_image_other_patches_;
763 
764   // Patches for string literals in JIT compiled code.
765   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
766   // Patches for class literals in JIT compiled code.
767   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
768 
769   // Fixups for jump tables need to be handled specially.
770   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
771 
772   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
773 };
774 
775 }  // namespace x86_64
776 }  // namespace art
777 
778 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
779