1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20 #include "arch/x86_64/instruction_set_features_x86_64.h" 21 #include "base/macros.h" 22 #include "code_generator.h" 23 #include "driver/compiler_options.h" 24 #include "nodes.h" 25 #include "parallel_move_resolver.h" 26 #include "utils/x86_64/assembler_x86_64.h" 27 28 namespace art HIDDEN { 29 namespace x86_64 { 30 31 // Use a local definition to prevent copying mistakes. 32 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize); 33 34 // Some x86_64 instructions require a register to be available as temp. 35 static constexpr Register TMP = R11; 36 37 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 38 static constexpr FloatRegister kParameterFloatRegisters[] = 39 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 40 41 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 42 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 43 44 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 45 static constexpr size_t kRuntimeParameterCoreRegistersLength = 46 arraysize(kRuntimeParameterCoreRegisters); 47 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 48 static constexpr size_t kRuntimeParameterFpuRegistersLength = 49 arraysize(kRuntimeParameterFpuRegisters); 50 51 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 52 // If the ART ABI changes, this list must be updated. It is used to ensure that 53 // these are not clobbered by any direct call to native code (such as math intrinsics). 54 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 55 56 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ 57 V(CRC32Update) \ 58 V(CRC32UpdateBytes) \ 59 V(CRC32UpdateByteBuffer) \ 60 V(FP16ToFloat) \ 61 V(FP16ToHalf) \ 62 V(FP16Floor) \ 63 V(FP16Ceil) \ 64 V(FP16Rint) \ 65 V(FP16Greater) \ 66 V(FP16GreaterEquals) \ 67 V(FP16Less) \ 68 V(FP16LessEquals) \ 69 V(FP16Compare) \ 70 V(FP16Min) \ 71 V(FP16Max) \ 72 V(IntegerRemainderUnsigned) \ 73 V(LongRemainderUnsigned) \ 74 V(StringStringIndexOf) \ 75 V(StringStringIndexOfAfter) \ 76 V(StringBufferAppend) \ 77 V(StringBufferLength) \ 78 V(StringBufferToString) \ 79 V(StringBuilderAppendObject) \ 80 V(StringBuilderAppendString) \ 81 V(StringBuilderAppendCharSequence) \ 82 V(StringBuilderAppendCharArray) \ 83 V(StringBuilderAppendBoolean) \ 84 V(StringBuilderAppendChar) \ 85 V(StringBuilderAppendInt) \ 86 V(StringBuilderAppendLong) \ 87 V(StringBuilderAppendFloat) \ 88 V(StringBuilderAppendDouble) \ 89 V(StringBuilderLength) \ 90 V(StringBuilderToString) \ 91 /* 1.8 */ \ 92 V(MethodHandleInvokeExact) \ 93 V(MethodHandleInvoke) 94 95 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 96 public: InvokeRuntimeCallingConvention()97 InvokeRuntimeCallingConvention() 98 : CallingConvention(kRuntimeParameterCoreRegisters, 99 kRuntimeParameterCoreRegistersLength, 100 kRuntimeParameterFpuRegisters, 101 kRuntimeParameterFpuRegistersLength, 102 kX86_64PointerSize) {} 103 104 private: 105 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 106 }; 107 108 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 109 public: InvokeDexCallingConvention()110 InvokeDexCallingConvention() : CallingConvention( 111 kParameterCoreRegisters, 112 kParameterCoreRegistersLength, 113 kParameterFloatRegisters, 114 kParameterFloatRegistersLength, 115 kX86_64PointerSize) {} 116 117 private: 118 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 119 }; 120 121 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 122 public: CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)123 explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation) 124 : for_register_allocation_(for_register_allocation) {} 125 ~CriticalNativeCallingConventionVisitorX86_64()126 virtual ~CriticalNativeCallingConventionVisitorX86_64() {} 127 128 Location GetNextLocation(DataType::Type type) override; 129 Location GetReturnLocation(DataType::Type type) const override; 130 Location GetMethodLocation() const override; 131 GetStackOffset()132 size_t GetStackOffset() const { return stack_offset_; } 133 134 private: 135 // Register allocator does not support adjusting frame size, so we cannot provide final locations 136 // of stack arguments for register allocation. We ask the register allocator for any location and 137 // move these arguments to the right place after adjusting the SP when generating the call. 138 const bool for_register_allocation_; 139 size_t gpr_index_ = 0u; 140 size_t fpr_index_ = 0u; 141 size_t stack_offset_ = 0u; 142 143 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64); 144 }; 145 146 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 147 public: FieldAccessCallingConventionX86_64()148 FieldAccessCallingConventionX86_64() {} 149 GetObjectLocation()150 Location GetObjectLocation() const override { 151 return Location::RegisterLocation(RSI); 152 } GetFieldIndexLocation()153 Location GetFieldIndexLocation() const override { 154 return Location::RegisterLocation(RDI); 155 } GetReturnLocation(DataType::Type type)156 Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { 157 return Location::RegisterLocation(RAX); 158 } GetSetValueLocation(DataType::Type type,bool is_instance)159 Location GetSetValueLocation([[maybe_unused]] DataType::Type type, 160 bool is_instance) const override { 161 return is_instance 162 ? Location::RegisterLocation(RDX) 163 : Location::RegisterLocation(RSI); 164 } GetFpuLocation(DataType::Type type)165 Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { 166 return Location::FpuRegisterLocation(XMM0); 167 } 168 169 private: 170 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 171 }; 172 173 174 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 175 public: InvokeDexCallingConventionVisitorX86_64()176 InvokeDexCallingConventionVisitorX86_64() {} ~InvokeDexCallingConventionVisitorX86_64()177 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 178 179 Location GetNextLocation(DataType::Type type) override; 180 Location GetReturnLocation(DataType::Type type) const override; 181 Location GetMethodLocation() const override; 182 183 private: 184 InvokeDexCallingConvention calling_convention; 185 186 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 187 }; 188 189 class CodeGeneratorX86_64; 190 191 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 192 public: ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)193 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 194 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 195 196 void EmitMove(size_t index) override; 197 void EmitSwap(size_t index) override; 198 void SpillScratch(int reg) override; 199 void RestoreScratch(int reg) override; 200 201 X86_64Assembler* GetAssembler() const; 202 203 private: 204 void Exchange32(CpuRegister reg, int mem); 205 void Exchange32(XmmRegister reg, int mem); 206 void Exchange64(CpuRegister reg1, CpuRegister reg2); 207 void Exchange64(CpuRegister reg, int mem); 208 void Exchange64(XmmRegister reg, int mem); 209 void Exchange128(XmmRegister reg, int mem); 210 void ExchangeMemory32(int mem1, int mem2); 211 void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); 212 213 CodeGeneratorX86_64* const codegen_; 214 215 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 216 }; 217 218 class LocationsBuilderX86_64 : public HGraphVisitor { 219 public: LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)220 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 221 : HGraphVisitor(graph), codegen_(codegen) {} 222 223 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 224 void Visit##name(H##name* instr) override; 225 226 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)227 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 228 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 229 230 #undef DECLARE_VISIT_INSTRUCTION 231 232 void VisitInstruction(HInstruction* instruction) override { 233 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 234 << " (id " << instruction->GetId() << ")"; 235 } 236 237 private: 238 void HandleInvoke(HInvoke* invoke); 239 void HandleBitwiseOperation(HBinaryOperation* operation); 240 void HandleCondition(HCondition* condition); 241 void HandleShift(HBinaryOperation* operation); 242 void HandleFieldSet(HInstruction* instruction, 243 const FieldInfo& field_info, 244 WriteBarrierKind write_barrier_kind); 245 void HandleFieldGet(HInstruction* instruction); 246 bool CpuHasAvxFeatureFlag(); 247 bool CpuHasAvx2FeatureFlag(); 248 249 CodeGeneratorX86_64* const codegen_; 250 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 251 252 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 253 }; 254 255 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 256 public: 257 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 258 259 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 260 void Visit##name(H##name* instr) override; 261 262 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)263 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 264 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 265 266 #undef DECLARE_VISIT_INSTRUCTION 267 268 void VisitInstruction(HInstruction* instruction) override { 269 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 270 << " (id " << instruction->GetId() << ")"; 271 } 272 GetAssembler()273 X86_64Assembler* GetAssembler() const { return assembler_; } 274 275 // Generate a GC root reference load: 276 // 277 // root <- *address 278 // 279 // while honoring read barriers based on read_barrier_option. 280 void GenerateGcRootFieldLoad(HInstruction* instruction, 281 Location root, 282 const Address& address, 283 Label* fixup_label, 284 ReadBarrierOption read_barrier_option); 285 void HandleFieldSet(HInstruction* instruction, 286 uint32_t value_index, 287 uint32_t extra_temp_index, 288 DataType::Type field_type, 289 Address field_addr, 290 CpuRegister base, 291 bool is_volatile, 292 bool is_atomic, 293 bool value_can_be_null, 294 bool byte_swap, 295 WriteBarrierKind write_barrier_kind); 296 297 void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); 298 299 private: 300 // Generate code for the given suspend check. If not null, `successor` 301 // is the block to branch to if the suspend check is not needed, and after 302 // the suspend call. 303 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 304 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 305 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); 306 void HandleBitwiseOperation(HBinaryOperation* operation); 307 void GenerateRemFP(HRem* rem); 308 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 309 void DivByPowerOfTwo(HDiv* instruction); 310 void RemByPowerOfTwo(HRem* instruction); 311 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 312 void GenerateDivRemIntegral(HBinaryOperation* instruction); 313 void HandleCondition(HCondition* condition); 314 void HandleShift(HBinaryOperation* operation); 315 316 void HandleFieldSet(HInstruction* instruction, 317 const FieldInfo& field_info, 318 bool value_can_be_null, 319 WriteBarrierKind write_barrier_kind); 320 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 321 322 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 323 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 324 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 325 void GenerateMethodEntryExitHook(HInstruction* instruction); 326 327 // Generate a heap reference load using one register `out`: 328 // 329 // out <- *(out + offset) 330 // 331 // while honoring heap poisoning and/or read barriers (if any). 332 // 333 // Location `maybe_temp` is used when generating a read barrier and 334 // shall be a register in that case; it may be an invalid location 335 // otherwise. 336 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 337 Location out, 338 uint32_t offset, 339 Location maybe_temp, 340 ReadBarrierOption read_barrier_option); 341 // Generate a heap reference load using two different registers 342 // `out` and `obj`: 343 // 344 // out <- *(obj + offset) 345 // 346 // while honoring heap poisoning and/or read barriers (if any). 347 // 348 // Location `maybe_temp` is used when generating a Baker's (fast 349 // path) read barrier and shall be a register in that case; it may 350 // be an invalid location otherwise. 351 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 352 Location out, 353 Location obj, 354 uint32_t offset, 355 ReadBarrierOption read_barrier_option); 356 357 void PushOntoFPStack(Location source, uint32_t temp_offset, 358 uint32_t stack_adjustment, bool is_float); 359 void GenerateCompareTest(HCondition* condition); 360 template<class LabelType> 361 void GenerateTestAndBranch(HInstruction* instruction, 362 size_t condition_input_index, 363 LabelType* true_target, 364 LabelType* false_target); 365 template<class LabelType> 366 void GenerateCompareTestAndBranch(HCondition* condition, 367 LabelType* true_target, 368 LabelType* false_target); 369 template<class LabelType> 370 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 371 372 void HandleGoto(HInstruction* got, HBasicBlock* successor); 373 374 bool CpuHasAvxFeatureFlag(); 375 bool CpuHasAvx2FeatureFlag(); 376 377 X86_64Assembler* const assembler_; 378 CodeGeneratorX86_64* const codegen_; 379 380 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 381 }; 382 383 // Class for fixups to jump tables. 384 class JumpTableRIPFixup; 385 386 class CodeGeneratorX86_64 : public CodeGenerator { 387 public: 388 CodeGeneratorX86_64(HGraph* graph, 389 const CompilerOptions& compiler_options, 390 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86_64()391 virtual ~CodeGeneratorX86_64() {} 392 393 void GenerateFrameEntry() override; 394 void GenerateFrameExit() override; 395 void Bind(HBasicBlock* block) override; 396 void MoveConstant(Location destination, int32_t value) override; 397 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 398 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 399 400 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 401 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 402 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 403 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 404 405 // Generate code to invoke a runtime entry point. 406 void InvokeRuntime(QuickEntrypointEnum entrypoint, 407 HInstruction* instruction, 408 uint32_t dex_pc, 409 SlowPathCode* slow_path = nullptr) override; 410 411 // Generate code to invoke a runtime entry point, but do not record 412 // PC-related information in a stack map. 413 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 414 HInstruction* instruction, 415 SlowPathCode* slow_path); 416 417 void GenerateInvokeRuntime(int32_t entry_point_offset); 418 GetWordSize()419 size_t GetWordSize() const override { 420 return kX86_64WordSize; 421 } 422 GetSlowPathFPWidth()423 size_t GetSlowPathFPWidth() const override { 424 return GetGraph()->HasSIMD() 425 ? GetSIMDRegisterWidth() 426 : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill 427 } 428 GetCalleePreservedFPWidth()429 size_t GetCalleePreservedFPWidth() const override { 430 return 1 * kX86_64WordSize; 431 } 432 GetSIMDRegisterWidth()433 size_t GetSIMDRegisterWidth() const override { 434 return 2 * kX86_64WordSize; 435 } 436 GetLocationBuilder()437 HGraphVisitor* GetLocationBuilder() override { 438 return &location_builder_; 439 } 440 GetInstructionVisitor()441 HGraphVisitor* GetInstructionVisitor() override { 442 return &instruction_visitor_; 443 } 444 GetAssembler()445 X86_64Assembler* GetAssembler() override { 446 return &assembler_; 447 } 448 GetAssembler()449 const X86_64Assembler& GetAssembler() const override { 450 return assembler_; 451 } 452 GetMoveResolver()453 ParallelMoveResolverX86_64* GetMoveResolver() override { 454 return &move_resolver_; 455 } 456 GetAddressOf(HBasicBlock * block)457 uintptr_t GetAddressOf(HBasicBlock* block) override { 458 return GetLabelOf(block)->Position(); 459 } 460 461 void SetupBlockedRegisters() const override; 462 void DumpCoreRegister(std::ostream& stream, int reg) const override; 463 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 464 void Finalize() override; 465 GetInstructionSet()466 InstructionSet GetInstructionSet() const override { 467 return InstructionSet::kX86_64; 468 } 469 GetInstructionCodegen()470 InstructionCodeGeneratorX86_64* GetInstructionCodegen() { 471 return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor()); 472 } 473 474 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; 475 476 // Emit a write barrier if: 477 // A) emit_null_check is false 478 // B) emit_null_check is true, and value is not null. 479 void MaybeMarkGCCard(CpuRegister temp, 480 CpuRegister card, 481 CpuRegister object, 482 CpuRegister value, 483 bool emit_null_check); 484 485 // Emit a write barrier unconditionally. 486 void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object); 487 488 // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert 489 // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC 490 // GC is marking for eliminated write barriers. 491 void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object); 492 493 void GenerateMemoryBarrier(MemBarrierKind kind); 494 495 // Helper method to move a value between two locations. 496 void Move(Location destination, Location source); 497 // Helper method to load a value of non-reference type from memory. 498 void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src); 499 GetLabelOf(HBasicBlock * block)500 Label* GetLabelOf(HBasicBlock* block) const { 501 return CommonGetLabelOf<Label>(block_labels_, block); 502 } 503 Initialize()504 void Initialize() override { 505 block_labels_ = CommonInitializeLabels<Label>(); 506 } 507 NeedsTwoRegisters(DataType::Type type)508 bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } 509 510 // Check if the desired_string_load_kind is supported. If it is, return it, 511 // otherwise return a fall-back kind that should be used instead. 512 HLoadString::LoadKind GetSupportedLoadStringKind( 513 HLoadString::LoadKind desired_string_load_kind) override; 514 515 // Check if the desired_class_load_kind is supported. If it is, return it, 516 // otherwise return a fall-back kind that should be used instead. 517 HLoadClass::LoadKind GetSupportedLoadClassKind( 518 HLoadClass::LoadKind desired_class_load_kind) override; 519 520 // Check if the desired_dispatch_info is supported. If it is, return it, 521 // otherwise return a fall-back info that should be used instead. 522 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 523 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 524 ArtMethod* method) override; 525 526 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 527 void GenerateStaticOrDirectCall( 528 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 529 void GenerateVirtualCall( 530 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 531 532 void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); 533 void RecordBootImageRelRoPatch(uint32_t boot_image_offset); 534 void RecordBootImageMethodPatch(HInvoke* invoke); 535 void RecordMethodBssEntryPatch(HInvoke* invoke); 536 void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 537 void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 538 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 539 void RecordBootImageStringPatch(HLoadString* load_string); 540 Label* NewStringBssEntryPatch(HLoadString* load_string); 541 Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type); 542 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 543 Label* NewJitRootStringPatch(const DexFile& dex_file, 544 dex::StringIndex string_index, 545 Handle<mirror::String> handle); 546 Label* NewJitRootClassPatch(const DexFile& dex_file, 547 dex::TypeIndex type_index, 548 Handle<mirror::Class> handle); 549 550 void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); 551 void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke); 552 void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root); 553 554 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 555 556 void PatchJitRootUse(uint8_t* code, 557 const uint8_t* roots_data, 558 const PatchInfo<Label>& info, 559 uint64_t index_in_table) const; 560 561 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 562 563 // Fast path implementation of ReadBarrier::Barrier for a heap 564 // reference field load when Baker's read barriers are used. 565 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 566 Location ref, 567 CpuRegister obj, 568 uint32_t offset, 569 bool needs_null_check); 570 // Fast path implementation of ReadBarrier::Barrier for a heap 571 // reference array load when Baker's read barriers are used. 572 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 573 Location ref, 574 CpuRegister obj, 575 uint32_t data_offset, 576 Location index, 577 bool needs_null_check); 578 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 579 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 580 // 581 // Load the object reference located at address `src`, held by 582 // object `obj`, into `ref`, and mark it if needed. The base of 583 // address `src` must be `obj`. 584 // 585 // If `always_update_field` is true, the value of the reference is 586 // atomically updated in the holder (`obj`). This operation 587 // requires two temporary registers, which must be provided as 588 // non-null pointers (`temp1` and `temp2`). 589 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 590 Location ref, 591 CpuRegister obj, 592 const Address& src, 593 bool needs_null_check, 594 bool always_update_field = false, 595 CpuRegister* temp1 = nullptr, 596 CpuRegister* temp2 = nullptr); 597 598 // Generate a read barrier for a heap reference within `instruction` 599 // using a slow path. 600 // 601 // A read barrier for an object reference read from the heap is 602 // implemented as a call to the artReadBarrierSlow runtime entry 603 // point, which is passed the values in locations `ref`, `obj`, and 604 // `offset`: 605 // 606 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 607 // mirror::Object* obj, 608 // uint32_t offset); 609 // 610 // The `out` location contains the value returned by 611 // artReadBarrierSlow. 612 // 613 // When `index` provided (i.e., when it is different from 614 // Location::NoLocation()), the offset value passed to 615 // artReadBarrierSlow is adjusted to take `index` into account. 616 void GenerateReadBarrierSlow(HInstruction* instruction, 617 Location out, 618 Location ref, 619 Location obj, 620 uint32_t offset, 621 Location index = Location::NoLocation()); 622 623 // If read barriers are enabled, generate a read barrier for a heap 624 // reference using a slow path. If heap poisoning is enabled, also 625 // unpoison the reference in `out`. 626 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 627 Location out, 628 Location ref, 629 Location obj, 630 uint32_t offset, 631 Location index = Location::NoLocation()); 632 633 // Generate a read barrier for a GC root within `instruction` using 634 // a slow path. 635 // 636 // A read barrier for an object reference GC root is implemented as 637 // a call to the artReadBarrierForRootSlow runtime entry point, 638 // which is passed the value in location `root`: 639 // 640 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 641 // 642 // The `out` location contains the value returned by 643 // artReadBarrierForRootSlow. 644 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 645 ConstantAreaStart()646 int ConstantAreaStart() const { 647 return constant_area_start_; 648 } 649 650 Address LiteralDoubleAddress(double v); 651 Address LiteralFloatAddress(float v); 652 Address LiteralInt32Address(int32_t v); 653 Address LiteralInt64Address(int64_t v); 654 655 // Load a 32/64-bit value into a register in the most efficient manner. 656 void Load32BitValue(CpuRegister dest, int32_t value); 657 void Load64BitValue(CpuRegister dest, int64_t value); 658 void Load32BitValue(XmmRegister dest, int32_t value); 659 void Load64BitValue(XmmRegister dest, int64_t value); 660 void Load32BitValue(XmmRegister dest, float value); 661 void Load64BitValue(XmmRegister dest, double value); 662 663 // Compare a register with a 32/64-bit value in the most efficient manner. 664 void Compare32BitValue(CpuRegister dest, int32_t value); 665 void Compare64BitValue(CpuRegister dest, int64_t value); 666 667 // Compare int values. Supports register locations for `lhs`. 668 void GenerateIntCompare(Location lhs, Location rhs); 669 void GenerateIntCompare(CpuRegister lhs, Location rhs); 670 671 // Compare long values. Supports only register locations for `lhs`. 672 void GenerateLongCompare(Location lhs, Location rhs); 673 674 // Construct address for array access. 675 static Address ArrayAddress(CpuRegister obj, 676 Location index, 677 ScaleFactor scale, 678 uint32_t data_offset); 679 680 Address LiteralCaseTable(HPackedSwitch* switch_instr); 681 682 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 683 void Store64BitValueToStack(Location dest, int64_t value); 684 685 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 686 687 // Assign a 64 bit constant to an address. 688 void MoveInt64ToAddress(const Address& addr_low, 689 const Address& addr_high, 690 int64_t v, 691 HInstruction* instruction); 692 693 // Ensure that prior stores complete to memory before subsequent loads. 694 // The locked add implementation will avoid serializing device memory, but will 695 // touch (but not change) the top of the stack. 696 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 697 void MemoryFence(bool force_mfence = false) { 698 if (!force_mfence) { 699 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 700 } else { 701 assembler_.mfence(); 702 } 703 } 704 705 void IncreaseFrame(size_t adjustment) override; 706 void DecreaseFrame(size_t adjustment) override; 707 708 void GenerateNop() override; 709 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 710 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 711 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); 712 713 void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); 714 715 static void BlockNonVolatileXmmRegisters(LocationSummary* locations); 716 717 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 718 // We will fix this up in the linker later to have the right value. 719 static constexpr int32_t kPlaceholder32BitOffset = 256; 720 721 private: 722 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 723 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, 724 ArenaVector<linker::LinkerPatch>* linker_patches); 725 726 // Labels for each block that will be compiled. 727 Label* block_labels_; // Indexed by block id. 728 Label frame_entry_label_; 729 LocationsBuilderX86_64 location_builder_; 730 InstructionCodeGeneratorX86_64 instruction_visitor_; 731 ParallelMoveResolverX86_64 move_resolver_; 732 X86_64Assembler assembler_; 733 734 // Offset to the start of the constant area in the assembled code. 735 // Used for fixups to the constant area. 736 int constant_area_start_; 737 738 // PC-relative method patch info for kBootImageLinkTimePcRelative. 739 ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; 740 // PC-relative method patch info for kBssEntry. 741 ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; 742 // PC-relative type patch info for kBootImageLinkTimePcRelative. 743 ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; 744 // PC-relative type patch info for kAppImageRelRo. 745 ArenaDeque<PatchInfo<Label>> app_image_type_patches_; 746 // PC-relative type patch info for kBssEntry. 747 ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; 748 // PC-relative public type patch info for kBssEntryPublic. 749 ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_; 750 // PC-relative package type patch info for kBssEntryPackage. 751 ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_; 752 // PC-relative String patch info for kBootImageLinkTimePcRelative. 753 ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; 754 // PC-relative String patch info for kBssEntry. 755 ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; 756 // PC-relative MethodType patch info for kBssEntry. 757 ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_; 758 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 759 ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; 760 // PC-relative patch info for IntrinsicObjects for the boot image, 761 // and for method/type/string patches for kBootImageRelRo otherwise. 762 ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; 763 764 // Patches for string literals in JIT compiled code. 765 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 766 // Patches for class literals in JIT compiled code. 767 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 768 769 // Fixups for jump tables need to be handled specially. 770 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 771 772 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 773 }; 774 775 } // namespace x86_64 776 } // namespace art 777 778 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 779