|  | /* | 
|  | * Copyright (C) 2014 The Android Open Source Project | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ | 
|  | #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ | 
|  |  | 
|  | #include "arch/instruction_set.h" | 
|  | #include "arch/instruction_set_features.h" | 
|  | #include "base/arena_containers.h" | 
|  | #include "base/arena_object.h" | 
|  | #include "base/bit_field.h" | 
|  | #include "base/bit_utils.h" | 
|  | #include "base/enums.h" | 
|  | #include "globals.h" | 
|  | #include "graph_visualizer.h" | 
|  | #include "locations.h" | 
|  | #include "memory_region.h" | 
|  | #include "nodes.h" | 
|  | #include "optimizing_compiler_stats.h" | 
|  | #include "read_barrier_option.h" | 
|  | #include "stack_map_stream.h" | 
|  | #include "string_reference.h" | 
|  | #include "utils/label.h" | 
|  | #include "utils/type_reference.h" | 
|  |  | 
|  | namespace art { | 
|  |  | 
|  | // Binary encoding of 2^32 for type double. | 
|  | static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); | 
|  | // Binary encoding of 2^31 for type double. | 
|  | static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); | 
|  |  | 
|  | // Minimum value for a primitive integer. | 
|  | static int32_t constexpr kPrimIntMin = 0x80000000; | 
|  | // Minimum value for a primitive long. | 
|  | static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); | 
|  |  | 
|  | // Maximum value for a primitive integer. | 
|  | static int32_t constexpr kPrimIntMax = 0x7fffffff; | 
|  | // Maximum value for a primitive long. | 
|  | static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); | 
|  |  | 
|  | static constexpr ReadBarrierOption kCompilerReadBarrierOption = | 
|  | kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; | 
|  |  | 
|  | class Assembler; | 
|  | class CodeGenerator; | 
|  | class CompilerDriver; | 
|  | class CompilerOptions; | 
|  | class LinkerPatch; | 
|  | class ParallelMoveResolver; | 
|  |  | 
|  | class CodeAllocator { | 
|  | public: | 
|  | CodeAllocator() {} | 
|  | virtual ~CodeAllocator() {} | 
|  |  | 
|  | virtual uint8_t* Allocate(size_t size) = 0; | 
|  |  | 
|  | private: | 
|  | DISALLOW_COPY_AND_ASSIGN(CodeAllocator); | 
|  | }; | 
|  |  | 
|  | class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { | 
|  | public: | 
|  | explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { | 
|  | for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { | 
|  | saved_core_stack_offsets_[i] = kRegisterNotSaved; | 
|  | saved_fpu_stack_offsets_[i] = kRegisterNotSaved; | 
|  | } | 
|  | } | 
|  |  | 
|  | virtual ~SlowPathCode() {} | 
|  |  | 
|  | virtual void EmitNativeCode(CodeGenerator* codegen) = 0; | 
|  |  | 
|  | // Save live core and floating-point caller-save registers and | 
|  | // update the stack mask in `locations` for registers holding object | 
|  | // references. | 
|  | virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); | 
|  | // Restore live core and floating-point caller-save registers. | 
|  | virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); | 
|  |  | 
|  | bool IsCoreRegisterSaved(int reg) const { | 
|  | return saved_core_stack_offsets_[reg] != kRegisterNotSaved; | 
|  | } | 
|  |  | 
|  | bool IsFpuRegisterSaved(int reg) const { | 
|  | return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; | 
|  | } | 
|  |  | 
|  | uint32_t GetStackOffsetOfCoreRegister(int reg) const { | 
|  | return saved_core_stack_offsets_[reg]; | 
|  | } | 
|  |  | 
|  | uint32_t GetStackOffsetOfFpuRegister(int reg) const { | 
|  | return saved_fpu_stack_offsets_[reg]; | 
|  | } | 
|  |  | 
|  | virtual bool IsFatal() const { return false; } | 
|  |  | 
|  | virtual const char* GetDescription() const = 0; | 
|  |  | 
|  | Label* GetEntryLabel() { return &entry_label_; } | 
|  | Label* GetExitLabel() { return &exit_label_; } | 
|  |  | 
|  | HInstruction* GetInstruction() const { | 
|  | return instruction_; | 
|  | } | 
|  |  | 
|  | uint32_t GetDexPc() const { | 
|  | return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; | 
|  | } | 
|  |  | 
|  | protected: | 
|  | static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; | 
|  | static constexpr uint32_t kRegisterNotSaved = -1; | 
|  | // The instruction where this slow path is happening. | 
|  | HInstruction* instruction_; | 
|  | uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; | 
|  | uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; | 
|  |  | 
|  | private: | 
|  | Label entry_label_; | 
|  | Label exit_label_; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(SlowPathCode); | 
|  | }; | 
|  |  | 
|  | class InvokeDexCallingConventionVisitor { | 
|  | public: | 
|  | virtual Location GetNextLocation(Primitive::Type type) = 0; | 
|  | virtual Location GetReturnLocation(Primitive::Type type) const = 0; | 
|  | virtual Location GetMethodLocation() const = 0; | 
|  |  | 
|  | protected: | 
|  | InvokeDexCallingConventionVisitor() {} | 
|  | virtual ~InvokeDexCallingConventionVisitor() {} | 
|  |  | 
|  | // The current index for core registers. | 
|  | uint32_t gp_index_ = 0u; | 
|  | // The current index for floating-point registers. | 
|  | uint32_t float_index_ = 0u; | 
|  | // The current stack index. | 
|  | uint32_t stack_index_ = 0u; | 
|  |  | 
|  | private: | 
|  | DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); | 
|  | }; | 
|  |  | 
|  | class FieldAccessCallingConvention { | 
|  | public: | 
|  | virtual Location GetObjectLocation() const = 0; | 
|  | virtual Location GetFieldIndexLocation() const = 0; | 
|  | virtual Location GetReturnLocation(Primitive::Type type) const = 0; | 
|  | virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; | 
|  | virtual Location GetFpuLocation(Primitive::Type type) const = 0; | 
|  | virtual ~FieldAccessCallingConvention() {} | 
|  |  | 
|  | protected: | 
|  | FieldAccessCallingConvention() {} | 
|  |  | 
|  | private: | 
|  | DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); | 
|  | }; | 
|  |  | 
|  | class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { | 
|  | public: | 
|  | // Compiles the graph to executable instructions. | 
|  | void Compile(CodeAllocator* allocator); | 
|  | static std::unique_ptr<CodeGenerator> Create(HGraph* graph, | 
|  | InstructionSet instruction_set, | 
|  | const InstructionSetFeatures& isa_features, | 
|  | const CompilerOptions& compiler_options, | 
|  | OptimizingCompilerStats* stats = nullptr); | 
|  | virtual ~CodeGenerator() {} | 
|  |  | 
|  | // Get the graph. This is the outermost graph, never the graph of a method being inlined. | 
|  | HGraph* GetGraph() const { return graph_; } | 
|  |  | 
|  | HBasicBlock* GetNextBlockToEmit() const; | 
|  | HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; | 
|  | bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; | 
|  |  | 
|  | size_t GetStackSlotOfParameter(HParameterValue* parameter) const { | 
|  | // Note that this follows the current calling convention. | 
|  | return GetFrameSize() | 
|  | + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method | 
|  | + parameter->GetIndex() * kVRegSize; | 
|  | } | 
|  |  | 
|  | virtual void Initialize() = 0; | 
|  | virtual void Finalize(CodeAllocator* allocator); | 
|  | virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); | 
|  | virtual void GenerateFrameEntry() = 0; | 
|  | virtual void GenerateFrameExit() = 0; | 
|  | virtual void Bind(HBasicBlock* block) = 0; | 
|  | virtual void MoveConstant(Location destination, int32_t value) = 0; | 
|  | virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; | 
|  | virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; | 
|  |  | 
|  | virtual Assembler* GetAssembler() = 0; | 
|  | virtual const Assembler& GetAssembler() const = 0; | 
|  | virtual size_t GetWordSize() const = 0; | 
|  | virtual size_t GetFloatingPointSpillSlotSize() const = 0; | 
|  | virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; | 
|  | void InitializeCodeGeneration(size_t number_of_spill_slots, | 
|  | size_t maximum_safepoint_spill_size, | 
|  | size_t number_of_out_slots, | 
|  | const ArenaVector<HBasicBlock*>& block_order); | 
|  | // Backends can override this as necessary. For most, no special alignment is required. | 
|  | virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } | 
|  |  | 
|  | uint32_t GetFrameSize() const { return frame_size_; } | 
|  | void SetFrameSize(uint32_t size) { frame_size_ = size; } | 
|  | uint32_t GetCoreSpillMask() const { return core_spill_mask_; } | 
|  | uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } | 
|  |  | 
|  | size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } | 
|  | size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } | 
|  | virtual void SetupBlockedRegisters() const = 0; | 
|  |  | 
|  | virtual void ComputeSpillMask() { | 
|  | core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; | 
|  | DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; | 
|  | fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; | 
|  | } | 
|  |  | 
|  | static uint32_t ComputeRegisterMask(const int* registers, size_t length) { | 
|  | uint32_t mask = 0; | 
|  | for (size_t i = 0, e = length; i < e; ++i) { | 
|  | mask |= (1 << registers[i]); | 
|  | } | 
|  | return mask; | 
|  | } | 
|  |  | 
|  | virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; | 
|  | virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; | 
|  | virtual InstructionSet GetInstructionSet() const = 0; | 
|  |  | 
|  | const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } | 
|  |  | 
|  | void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; | 
|  |  | 
|  | // Saves the register in the stack. Returns the size taken on stack. | 
|  | virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; | 
|  | // Restores the register from the stack. Returns the size taken on stack. | 
|  | virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; | 
|  |  | 
|  | virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; | 
|  | virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; | 
|  |  | 
|  | virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; | 
|  | // Returns whether we should split long moves in parallel moves. | 
|  | virtual bool ShouldSplitLongMoves() const { return false; } | 
|  |  | 
|  | size_t GetNumberOfCoreCalleeSaveRegisters() const { | 
|  | return POPCOUNT(core_callee_save_mask_); | 
|  | } | 
|  |  | 
|  | size_t GetNumberOfCoreCallerSaveRegisters() const { | 
|  | DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); | 
|  | return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); | 
|  | } | 
|  |  | 
|  | bool IsCoreCalleeSaveRegister(int reg) const { | 
|  | return (core_callee_save_mask_ & (1 << reg)) != 0; | 
|  | } | 
|  |  | 
|  | bool IsFloatingPointCalleeSaveRegister(int reg) const { | 
|  | return (fpu_callee_save_mask_ & (1 << reg)) != 0; | 
|  | } | 
|  |  | 
|  | uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { | 
|  | DCHECK(locations->OnlyCallsOnSlowPath() || | 
|  | (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && | 
|  | !locations->HasCustomSlowPathCallingConvention())); | 
|  | uint32_t live_registers = core_registers | 
|  | ? locations->GetLiveRegisters()->GetCoreRegisters() | 
|  | : locations->GetLiveRegisters()->GetFloatingPointRegisters(); | 
|  | if (locations->HasCustomSlowPathCallingConvention()) { | 
|  | // Save only the live registers that the custom calling convention wants us to save. | 
|  | uint32_t caller_saves = core_registers | 
|  | ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() | 
|  | : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); | 
|  | return live_registers & caller_saves; | 
|  | } else { | 
|  | // Default ABI, we need to spill non-callee-save live registers. | 
|  | uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; | 
|  | return live_registers & ~callee_saves; | 
|  | } | 
|  | } | 
|  |  | 
|  | size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { | 
|  | return POPCOUNT(GetSlowPathSpills(locations, core_registers)); | 
|  | } | 
|  |  | 
|  | size_t GetStackOffsetOfShouldDeoptimizeFlag() const { | 
|  | DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); | 
|  | DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); | 
|  | return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; | 
|  | } | 
|  |  | 
|  | // Record native to dex mapping for a suspend point.  Required by runtime. | 
|  | void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); | 
|  | // Check whether we have already recorded mapping at this PC. | 
|  | bool HasStackMapAtCurrentPc(); | 
|  | // Record extra stack maps if we support native debugging. | 
|  | void MaybeRecordNativeDebugInfo(HInstruction* instruction, | 
|  | uint32_t dex_pc, | 
|  | SlowPathCode* slow_path = nullptr); | 
|  |  | 
|  | bool CanMoveNullCheckToUser(HNullCheck* null_check); | 
|  | void MaybeRecordImplicitNullCheck(HInstruction* instruction); | 
|  | LocationSummary* CreateThrowingSlowPathLocations( | 
|  | HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); | 
|  | void GenerateNullCheck(HNullCheck* null_check); | 
|  | virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; | 
|  | virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; | 
|  |  | 
|  | // Records a stack map which the runtime might use to set catch phi values | 
|  | // during exception delivery. | 
|  | // TODO: Replace with a catch-entering instruction that records the environment. | 
|  | void RecordCatchBlockInfo(); | 
|  |  | 
|  | // TODO: Avoid creating the `std::unique_ptr` here. | 
|  | void AddSlowPath(SlowPathCode* slow_path) { | 
|  | slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); | 
|  | } | 
|  |  | 
|  | void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); | 
|  | size_t ComputeStackMapsSize(); | 
|  | size_t GetNumberOfJitRoots() const { | 
|  | return jit_string_roots_.size() + jit_class_roots_.size(); | 
|  | } | 
|  |  | 
|  | // Fills the `literals` array with literals collected during code generation. | 
|  | // Also emits literal patches. | 
|  | void EmitJitRoots(uint8_t* code, | 
|  | Handle<mirror::ObjectArray<mirror::Object>> roots, | 
|  | const uint8_t* roots_data, | 
|  | Handle<mirror::DexCache> outer_dex_cache) | 
|  | REQUIRES_SHARED(Locks::mutator_lock_); | 
|  |  | 
|  | bool IsLeafMethod() const { | 
|  | return is_leaf_; | 
|  | } | 
|  |  | 
|  | void MarkNotLeaf() { | 
|  | is_leaf_ = false; | 
|  | requires_current_method_ = true; | 
|  | } | 
|  |  | 
|  | void SetRequiresCurrentMethod() { | 
|  | requires_current_method_ = true; | 
|  | } | 
|  |  | 
|  | bool RequiresCurrentMethod() const { | 
|  | return requires_current_method_; | 
|  | } | 
|  |  | 
|  | // Clears the spill slots taken by loop phis in the `LocationSummary` of the | 
|  | // suspend check. This is called when the code generator generates code | 
|  | // for the suspend check at the back edge (instead of where the suspend check | 
|  | // is, which is the loop entry). At this point, the spill slots for the phis | 
|  | // have not been written to. | 
|  | void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; | 
|  |  | 
|  | bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } | 
|  | bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } | 
|  |  | 
|  | bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } | 
|  | bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } | 
|  |  | 
|  | // Helper that returns the pointer offset of an index in an object array. | 
|  | // Note: this method assumes we always have the same pointer size, regardless | 
|  | // of the architecture. | 
|  | static size_t GetCacheOffset(uint32_t index); | 
|  | // Pointer variant for ArtMethod and ArtField arrays. | 
|  | size_t GetCachePointerOffset(uint32_t index); | 
|  |  | 
|  | // Helper that returns the offset of the array's length field. | 
|  | // Note: Besides the normal arrays, we also use the HArrayLength for | 
|  | // accessing the String's `count` field in String intrinsics. | 
|  | static uint32_t GetArrayLengthOffset(HArrayLength* array_length); | 
|  |  | 
|  | // Helper that returns the offset of the array's data. | 
|  | // Note: Besides the normal arrays, we also use the HArrayGet for | 
|  | // accessing the String's `value` field in String intrinsics. | 
|  | static uint32_t GetArrayDataOffset(HArrayGet* array_get); | 
|  |  | 
|  | // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. | 
|  | template <PointerSize pointer_size> | 
|  | static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { | 
|  | // The entry point list defines 30 ReadBarrierMarkRegX entry points. | 
|  | DCHECK_LT(reg, 30u); | 
|  | // The ReadBarrierMarkRegX entry points are ordered by increasing | 
|  | // register number in Thread::tls_Ptr_.quick_entrypoints. | 
|  | return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() | 
|  | + static_cast<size_t>(pointer_size) * reg; | 
|  | } | 
|  |  | 
|  | void EmitParallelMoves(Location from1, | 
|  | Location to1, | 
|  | Primitive::Type type1, | 
|  | Location from2, | 
|  | Location to2, | 
|  | Primitive::Type type2); | 
|  |  | 
|  | static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { | 
|  | // Check that null value is not represented as an integer constant. | 
|  | DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); | 
|  | return type == Primitive::kPrimNot && !value->IsNullConstant(); | 
|  | } | 
|  |  | 
|  |  | 
|  | // Perfoms checks pertaining to an InvokeRuntime call. | 
|  | void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, | 
|  | HInstruction* instruction, | 
|  | SlowPathCode* slow_path); | 
|  |  | 
|  | // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. | 
|  | static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, | 
|  | SlowPathCode* slow_path); | 
|  |  | 
|  | void AddAllocatedRegister(Location location) { | 
|  | allocated_registers_.Add(location); | 
|  | } | 
|  |  | 
|  | bool HasAllocatedRegister(bool is_core, int reg) const { | 
|  | return is_core | 
|  | ? allocated_registers_.ContainsCoreRegister(reg) | 
|  | : allocated_registers_.ContainsFloatingPointRegister(reg); | 
|  | } | 
|  |  | 
|  | void AllocateLocations(HInstruction* instruction); | 
|  |  | 
|  | // Tells whether the stack frame of the compiled method is | 
|  | // considered "empty", that is either actually having a size of zero, | 
|  | // or just containing the saved return address register. | 
|  | bool HasEmptyFrame() const { | 
|  | return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); | 
|  | } | 
|  |  | 
|  | static int32_t GetInt32ValueOf(HConstant* constant) { | 
|  | if (constant->IsIntConstant()) { | 
|  | return constant->AsIntConstant()->GetValue(); | 
|  | } else if (constant->IsNullConstant()) { | 
|  | return 0; | 
|  | } else { | 
|  | DCHECK(constant->IsFloatConstant()); | 
|  | return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); | 
|  | } | 
|  | } | 
|  |  | 
|  | static int64_t GetInt64ValueOf(HConstant* constant) { | 
|  | if (constant->IsIntConstant()) { | 
|  | return constant->AsIntConstant()->GetValue(); | 
|  | } else if (constant->IsNullConstant()) { | 
|  | return 0; | 
|  | } else if (constant->IsFloatConstant()) { | 
|  | return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); | 
|  | } else if (constant->IsLongConstant()) { | 
|  | return constant->AsLongConstant()->GetValue(); | 
|  | } else { | 
|  | DCHECK(constant->IsDoubleConstant()); | 
|  | return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); | 
|  | } | 
|  | } | 
|  |  | 
|  | size_t GetFirstRegisterSlotInSlowPath() const { | 
|  | return first_register_slot_in_slow_path_; | 
|  | } | 
|  |  | 
|  | uint32_t FrameEntrySpillSize() const { | 
|  | return GetFpuSpillSize() + GetCoreSpillSize(); | 
|  | } | 
|  |  | 
|  | virtual ParallelMoveResolver* GetMoveResolver() = 0; | 
|  |  | 
|  | static void CreateCommonInvokeLocationSummary( | 
|  | HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); | 
|  |  | 
|  | void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); | 
|  |  | 
|  | void CreateUnresolvedFieldLocationSummary( | 
|  | HInstruction* field_access, | 
|  | Primitive::Type field_type, | 
|  | const FieldAccessCallingConvention& calling_convention); | 
|  |  | 
|  | void GenerateUnresolvedFieldAccess( | 
|  | HInstruction* field_access, | 
|  | Primitive::Type field_type, | 
|  | uint32_t field_index, | 
|  | uint32_t dex_pc, | 
|  | const FieldAccessCallingConvention& calling_convention); | 
|  |  | 
|  | // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. | 
|  | static void CreateLoadClassLocationSummary(HLoadClass* cls, | 
|  | Location runtime_type_index_location, | 
|  | Location runtime_return_location, | 
|  | bool code_generator_supports_read_barrier = false); | 
|  |  | 
|  | static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); | 
|  |  | 
|  | void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } | 
|  | DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } | 
|  |  | 
|  | virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, | 
|  | HInstruction* instruction, | 
|  | uint32_t dex_pc, | 
|  | SlowPathCode* slow_path) = 0; | 
|  |  | 
|  | // Check if the desired_string_load_kind is supported. If it is, return it, | 
|  | // otherwise return a fall-back kind that should be used instead. | 
|  | virtual HLoadString::LoadKind GetSupportedLoadStringKind( | 
|  | HLoadString::LoadKind desired_string_load_kind) = 0; | 
|  |  | 
|  | // Check if the desired_class_load_kind is supported. If it is, return it, | 
|  | // otherwise return a fall-back kind that should be used instead. | 
|  | virtual HLoadClass::LoadKind GetSupportedLoadClassKind( | 
|  | HLoadClass::LoadKind desired_class_load_kind) = 0; | 
|  |  | 
|  | static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { | 
|  | switch (load->GetLoadKind()) { | 
|  | case HLoadString::LoadKind::kBssEntry: | 
|  | DCHECK(load->NeedsEnvironment()); | 
|  | return LocationSummary::kCallOnSlowPath; | 
|  | case HLoadString::LoadKind::kDexCacheViaMethod: | 
|  | DCHECK(load->NeedsEnvironment()); | 
|  | return LocationSummary::kCallOnMainOnly; | 
|  | case HLoadString::LoadKind::kJitTableAddress: | 
|  | DCHECK(!load->NeedsEnvironment()); | 
|  | return kEmitCompilerReadBarrier | 
|  | ? LocationSummary::kCallOnSlowPath | 
|  | : LocationSummary::kNoCall; | 
|  | break; | 
|  | default: | 
|  | DCHECK(!load->NeedsEnvironment()); | 
|  | return LocationSummary::kNoCall; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Check if the desired_dispatch_info is supported. If it is, return it, | 
|  | // otherwise return a fall-back info that should be used instead. | 
|  | virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( | 
|  | const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, | 
|  | HInvokeStaticOrDirect* invoke) = 0; | 
|  |  | 
|  | // Generate a call to a static or direct method. | 
|  | virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; | 
|  | // Generate a call to a virtual method. | 
|  | virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; | 
|  |  | 
|  | // Copy the result of a call into the given target. | 
|  | virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; | 
|  |  | 
|  | virtual void GenerateNop() = 0; | 
|  |  | 
|  | uint32_t GetReferenceSlowFlagOffset() const; | 
|  | uint32_t GetReferenceDisableFlagOffset() const; | 
|  |  | 
|  | protected: | 
|  | // Patch info used for recording locations of required linker patches and their targets, | 
|  | // i.e. target method, string, type or code identified by their dex file and index. | 
|  | template <typename LabelType> | 
|  | struct PatchInfo { | 
|  | PatchInfo(const DexFile& target_dex_file, uint32_t target_index) | 
|  | : dex_file(target_dex_file), index(target_index) { } | 
|  |  | 
|  | const DexFile& dex_file; | 
|  | uint32_t index; | 
|  | LabelType label; | 
|  | }; | 
|  |  | 
|  | CodeGenerator(HGraph* graph, | 
|  | size_t number_of_core_registers, | 
|  | size_t number_of_fpu_registers, | 
|  | size_t number_of_register_pairs, | 
|  | uint32_t core_callee_save_mask, | 
|  | uint32_t fpu_callee_save_mask, | 
|  | const CompilerOptions& compiler_options, | 
|  | OptimizingCompilerStats* stats) | 
|  | : frame_size_(0), | 
|  | core_spill_mask_(0), | 
|  | fpu_spill_mask_(0), | 
|  | first_register_slot_in_slow_path_(0), | 
|  | allocated_registers_(RegisterSet::Empty()), | 
|  | blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, | 
|  | kArenaAllocCodeGenerator)), | 
|  | blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, | 
|  | kArenaAllocCodeGenerator)), | 
|  | number_of_core_registers_(number_of_core_registers), | 
|  | number_of_fpu_registers_(number_of_fpu_registers), | 
|  | number_of_register_pairs_(number_of_register_pairs), | 
|  | core_callee_save_mask_(core_callee_save_mask), | 
|  | fpu_callee_save_mask_(fpu_callee_save_mask), | 
|  | stack_map_stream_(graph->GetArena()), | 
|  | block_order_(nullptr), | 
|  | jit_string_roots_(StringReferenceValueComparator(), | 
|  | graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), | 
|  | jit_class_roots_(TypeReferenceValueComparator(), | 
|  | graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), | 
|  | disasm_info_(nullptr), | 
|  | stats_(stats), | 
|  | graph_(graph), | 
|  | compiler_options_(compiler_options), | 
|  | slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), | 
|  | current_slow_path_(nullptr), | 
|  | current_block_index_(0), | 
|  | is_leaf_(true), | 
|  | requires_current_method_(false) { | 
|  | slow_paths_.reserve(8); | 
|  | } | 
|  |  | 
|  | virtual HGraphVisitor* GetLocationBuilder() = 0; | 
|  | virtual HGraphVisitor* GetInstructionVisitor() = 0; | 
|  |  | 
|  | // Returns the location of the first spilled entry for floating point registers, | 
|  | // relative to the stack pointer. | 
|  | uint32_t GetFpuSpillStart() const { | 
|  | return GetFrameSize() - FrameEntrySpillSize(); | 
|  | } | 
|  |  | 
|  | uint32_t GetFpuSpillSize() const { | 
|  | return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); | 
|  | } | 
|  |  | 
|  | uint32_t GetCoreSpillSize() const { | 
|  | return POPCOUNT(core_spill_mask_) * GetWordSize(); | 
|  | } | 
|  |  | 
|  | virtual bool HasAllocatedCalleeSaveRegisters() const { | 
|  | // We check the core registers against 1 because it always comprises the return PC. | 
|  | return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) | 
|  | || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); | 
|  | } | 
|  |  | 
|  | bool CallPushesPC() const { | 
|  | InstructionSet instruction_set = GetInstructionSet(); | 
|  | return instruction_set == kX86 || instruction_set == kX86_64; | 
|  | } | 
|  |  | 
|  | // Arm64 has its own type for a label, so we need to templatize these methods | 
|  | // to share the logic. | 
|  |  | 
|  | template <typename LabelType> | 
|  | LabelType* CommonInitializeLabels() { | 
|  | // We use raw array allocations instead of ArenaVector<> because Labels are | 
|  | // non-constructible and non-movable and as such cannot be held in a vector. | 
|  | size_t size = GetGraph()->GetBlocks().size(); | 
|  | LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, | 
|  | kArenaAllocCodeGenerator); | 
|  | for (size_t i = 0; i != size; ++i) { | 
|  | new(labels + i) LabelType(); | 
|  | } | 
|  | return labels; | 
|  | } | 
|  |  | 
|  | template <typename LabelType> | 
|  | LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { | 
|  | block = FirstNonEmptyBlock(block); | 
|  | return raw_pointer_to_labels_array + block->GetBlockId(); | 
|  | } | 
|  |  | 
|  | SlowPathCode* GetCurrentSlowPath() { | 
|  | return current_slow_path_; | 
|  | } | 
|  |  | 
|  | // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. | 
|  | virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, | 
|  | const uint8_t* roots_data ATTRIBUTE_UNUSED) { | 
|  | DCHECK_EQ(jit_string_roots_.size(), 0u); | 
|  | DCHECK_EQ(jit_class_roots_.size(), 0u); | 
|  | } | 
|  |  | 
|  | // Frame size required for this method. | 
|  | uint32_t frame_size_; | 
|  | uint32_t core_spill_mask_; | 
|  | uint32_t fpu_spill_mask_; | 
|  | uint32_t first_register_slot_in_slow_path_; | 
|  |  | 
|  | // Registers that were allocated during linear scan. | 
|  | RegisterSet allocated_registers_; | 
|  |  | 
|  | // Arrays used when doing register allocation to know which | 
|  | // registers we can allocate. `SetupBlockedRegisters` updates the | 
|  | // arrays. | 
|  | bool* const blocked_core_registers_; | 
|  | bool* const blocked_fpu_registers_; | 
|  | size_t number_of_core_registers_; | 
|  | size_t number_of_fpu_registers_; | 
|  | size_t number_of_register_pairs_; | 
|  | const uint32_t core_callee_save_mask_; | 
|  | const uint32_t fpu_callee_save_mask_; | 
|  |  | 
|  | StackMapStream stack_map_stream_; | 
|  |  | 
|  | // The order to use for code generation. | 
|  | const ArenaVector<HBasicBlock*>* block_order_; | 
|  |  | 
|  | // Maps a StringReference (dex_file, string_index) to the index in the literal table. | 
|  | // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the | 
|  | // indices. | 
|  | ArenaSafeMap<StringReference, uint32_t, StringReferenceValueComparator> jit_string_roots_; | 
|  |  | 
|  | // Maps a ClassReference (dex_file, type_index) to the index in the literal table. | 
|  | // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` | 
|  | // will compute all the indices. | 
|  | ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; | 
|  |  | 
|  | DisassemblyInformation* disasm_info_; | 
|  |  | 
|  | private: | 
|  | size_t GetStackOffsetOfSavedRegister(size_t index); | 
|  | void GenerateSlowPaths(); | 
|  | void BlockIfInRegister(Location location, bool is_out = false) const; | 
|  | void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); | 
|  |  | 
|  | OptimizingCompilerStats* stats_; | 
|  |  | 
|  | HGraph* const graph_; | 
|  | const CompilerOptions& compiler_options_; | 
|  |  | 
|  | ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; | 
|  |  | 
|  | // The current slow-path that we're generating code for. | 
|  | SlowPathCode* current_slow_path_; | 
|  |  | 
|  | // The current block index in `block_order_` of the block | 
|  | // we are generating code for. | 
|  | size_t current_block_index_; | 
|  |  | 
|  | // Whether the method is a leaf method. | 
|  | bool is_leaf_; | 
|  |  | 
|  | // Whether an instruction in the graph accesses the current method. | 
|  | // TODO: Rename: this actually indicates that some instruction in the method | 
|  | // needs the environment including a valid stack frame. | 
|  | bool requires_current_method_; | 
|  |  | 
|  | friend class OptimizingCFITest; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(CodeGenerator); | 
|  | }; | 
|  |  | 
|  | template <typename C, typename F> | 
|  | class CallingConvention { | 
|  | public: | 
|  | CallingConvention(const C* registers, | 
|  | size_t number_of_registers, | 
|  | const F* fpu_registers, | 
|  | size_t number_of_fpu_registers, | 
|  | PointerSize pointer_size) | 
|  | : registers_(registers), | 
|  | number_of_registers_(number_of_registers), | 
|  | fpu_registers_(fpu_registers), | 
|  | number_of_fpu_registers_(number_of_fpu_registers), | 
|  | pointer_size_(pointer_size) {} | 
|  |  | 
|  | size_t GetNumberOfRegisters() const { return number_of_registers_; } | 
|  | size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } | 
|  |  | 
|  | C GetRegisterAt(size_t index) const { | 
|  | DCHECK_LT(index, number_of_registers_); | 
|  | return registers_[index]; | 
|  | } | 
|  |  | 
|  | F GetFpuRegisterAt(size_t index) const { | 
|  | DCHECK_LT(index, number_of_fpu_registers_); | 
|  | return fpu_registers_[index]; | 
|  | } | 
|  |  | 
|  | size_t GetStackOffsetOf(size_t index) const { | 
|  | // We still reserve the space for parameters passed by registers. | 
|  | // Add space for the method pointer. | 
|  | return static_cast<size_t>(pointer_size_) + index * kVRegSize; | 
|  | } | 
|  |  | 
|  | private: | 
|  | const C* registers_; | 
|  | const size_t number_of_registers_; | 
|  | const F* fpu_registers_; | 
|  | const size_t number_of_fpu_registers_; | 
|  | const PointerSize pointer_size_; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(CallingConvention); | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * A templated class SlowPathGenerator with a templated method NewSlowPath() | 
|  | * that can be used by any code generator to share equivalent slow-paths with | 
|  | * the objective of reducing generated code size. | 
|  | * | 
|  | * InstructionType:  instruction that requires SlowPathCodeType | 
|  | * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) | 
|  | */ | 
|  | template <typename InstructionType> | 
|  | class SlowPathGenerator { | 
|  | static_assert(std::is_base_of<HInstruction, InstructionType>::value, | 
|  | "InstructionType is not a subclass of art::HInstruction"); | 
|  |  | 
|  | public: | 
|  | SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) | 
|  | : graph_(graph), | 
|  | codegen_(codegen), | 
|  | slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} | 
|  |  | 
|  | // Creates and adds a new slow-path, if needed, or returns existing one otherwise. | 
|  | // Templating the method (rather than the whole class) on the slow-path type enables | 
|  | // keeping this code at a generic, non architecture-specific place. | 
|  | // | 
|  | // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. | 
|  | //       To relax this requirement, we would need some RTTI on the stored slow-paths, | 
|  | //       or template the class as a whole on SlowPathType. | 
|  | template <typename SlowPathCodeType> | 
|  | SlowPathCodeType* NewSlowPath(InstructionType* instruction) { | 
|  | static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, | 
|  | "SlowPathCodeType is not a subclass of art::SlowPathCode"); | 
|  | static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, | 
|  | "SlowPathCodeType is not constructible from InstructionType*"); | 
|  | // Iterate over potential candidates for sharing. Currently, only same-typed | 
|  | // slow-paths with exactly the same dex-pc are viable candidates. | 
|  | // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? | 
|  | const uint32_t dex_pc = instruction->GetDexPc(); | 
|  | auto iter = slow_path_map_.find(dex_pc); | 
|  | if (iter != slow_path_map_.end()) { | 
|  | auto candidates = iter->second; | 
|  | for (const auto& it : candidates) { | 
|  | InstructionType* other_instruction = it.first; | 
|  | SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); | 
|  | // Determine if the instructions allow for slow-path sharing. | 
|  | if (HaveSameLiveRegisters(instruction, other_instruction) && | 
|  | HaveSameStackMap(instruction, other_instruction)) { | 
|  | // Can share: reuse existing one. | 
|  | return other_slow_path; | 
|  | } | 
|  | } | 
|  | } else { | 
|  | // First time this dex-pc is seen. | 
|  | iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); | 
|  | } | 
|  | // Cannot share: create and add new slow-path for this particular dex-pc. | 
|  | SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); | 
|  | iter->second.emplace_back(std::make_pair(instruction, slow_path)); | 
|  | codegen_->AddSlowPath(slow_path); | 
|  | return slow_path; | 
|  | } | 
|  |  | 
|  | private: | 
|  | // Tests if both instructions have same set of live physical registers. This ensures | 
|  | // the slow-path has exactly the same preamble on saving these registers to stack. | 
|  | bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { | 
|  | const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); | 
|  | const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); | 
|  | RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); | 
|  | RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); | 
|  | return (((live1->GetCoreRegisters() & core_spill) == | 
|  | (live2->GetCoreRegisters() & core_spill)) && | 
|  | ((live1->GetFloatingPointRegisters() & fpu_spill) == | 
|  | (live2->GetFloatingPointRegisters() & fpu_spill))); | 
|  | } | 
|  |  | 
|  | // Tests if both instructions have the same stack map. This ensures the interpreter | 
|  | // will find exactly the same dex-registers at the same entries. | 
|  | bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { | 
|  | DCHECK(i1->HasEnvironment()); | 
|  | DCHECK(i2->HasEnvironment()); | 
|  | // We conservatively test if the two instructions find exactly the same instructions | 
|  | // and location in each dex-register. This guarantees they will have the same stack map. | 
|  | HEnvironment* e1 = i1->GetEnvironment(); | 
|  | HEnvironment* e2 = i2->GetEnvironment(); | 
|  | if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { | 
|  | return false; | 
|  | } | 
|  | for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { | 
|  | if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || | 
|  | !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | HGraph* const graph_; | 
|  | CodeGenerator* const codegen_; | 
|  |  | 
|  | // Map from dex-pc to vector of already existing instruction/slow-path pairs. | 
|  | ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); | 
|  | }; | 
|  |  | 
|  | class InstructionCodeGenerator : public HGraphVisitor { | 
|  | public: | 
|  | InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) | 
|  | : HGraphVisitor(graph), | 
|  | deopt_slow_paths_(graph, codegen) {} | 
|  |  | 
|  | protected: | 
|  | // Add slow-path generator for each instruction/slow-path combination that desires sharing. | 
|  | // TODO: under current regime, only deopt sharing make sense; extend later. | 
|  | SlowPathGenerator<HDeoptimize> deopt_slow_paths_; | 
|  | }; | 
|  |  | 
|  | }  // namespace art | 
|  |  | 
|  | #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |